Imported Upstream version 1.9.0upstream/1.9.0 submit/tizen/20200905.125700 accepted/tizen/unified/20200906.032650

author: Chunseok Lee <chunseok.lee@samsung.com> 2020-09-05 21:49:46 +0900
committer: Chunseok Lee <chunseok.lee@samsung.com> 2020-09-05 21:49:46 +0900
commit: 74476a2d0296bdad70a2f7f90bc7419a8b05bffd (patch)
tree: 3f991636c1e9423d38eb16a384c20b569b0d678e /compiler
parent: 042b262b3633b6c0f577aed6cb4b980ad0c1dcf3 (diff)
download: nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.gz
nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.bz2
nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.zip
181 files changed, 7136 insertions, 3280 deletions
diff --git a/compiler/circle-quantizer/CMakeLists.txt b/compiler/circle-quantizer/CMakeLists.txt
index 009bfabea..5075b13d5 100644
--- a/compiler/circle-quantizer/CMakeLists.txt
+++ b/compiler/circle-quantizer/CMakeLists.txt
@@ -1,8 +1,6 @@
-file(GLOB_RECURSE SOURCES "src/*.cpp")
+set (SOURCES src/CircleQuantizer.cpp)
 
 add_executable(circle-quantizer "${SOURCES}")
-target_include_directories(circle-quantizer PRIVATE include)
-target_include_directories(circle-quantizer PRIVATE src)
 target_link_libraries(circle-quantizer foder)
 target_link_libraries(circle-quantizer safemain)
 target_link_libraries(circle-quantizer oops)
diff --git a/compiler/circle-quantizer/include/CircleExpContract.h b/compiler/circle-quantizer/include/CircleExpContract.h
deleted file mode 100644
index e888e4a12..000000000
--- a/compiler/circle-quantizer/include/CircleExpContract.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLEQUANTIZER_CIRCLEXPCONTRACT_H__
-#define __CIRCLEQUANTIZER_CIRCLEXPCONTRACT_H__
-
-#include <loco.h>
-#include <luci/CircleExporter.h>
-#include <luci/IR/Module.h>
-
-#include <memory>
-#include <string>
-
-struct CircleExpContract : public luci::CircleExporter::Contract
-{
-public:
-  CircleExpContract(luci::Module *module, const std::string &filename)
-      : _module(module), _filepath(filename)
-  {
-    // NOTHING TO DO
-  }
-  virtual ~CircleExpContract() = default;
-
-public:
-  loco::Graph *graph(void) const final { return nullptr; }
-  luci::Module *module(void) const final { return _module; };
-
-public:
-  bool store(const char *ptr, const size_t size) const final;
-
-private:
-  luci::Module *_module;
-  const std::string _filepath;
-};
-
-#endif // __CIRCLEQUANTIZER_CIRCLEXPCONTRACT_H__
diff --git a/compiler/circle-quantizer/src/CircleQuantizer.cpp b/compiler/circle-quantizer/src/CircleQuantizer.cpp
index 8d3a80c91..54b38a170 100644
--- a/compiler/circle-quantizer/src/CircleQuantizer.cpp
+++ b/compiler/circle-quantizer/src/CircleQuantizer.cpp
@@ -14,14 +14,13 @@
  * limitations under the License.
  */
 
-#include "CircleExpContract.h"
-
 #include <foder/FileLoader.h>
 
 #include <luci/Importer.h>
 #include <luci/CircleOptimizer.h>
 #include <luci/Service/Validate.h>
 #include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
 
 #include <oops/InternalExn.h>
 #include <arser/arser.h>
@@ -37,6 +36,14 @@ using OptionHook = std::function<int(const char **)>;
 using Algorithms = luci::CircleOptimizer::Options::Algorithm;
 using AlgorithmParameters = luci::CircleOptimizer::Options::AlgorithmParameters;
 
+void print_exclusive_options(void)
+{
+  std::cout << "Use only one of the 3 options below." << std::endl;
+  std::cout << "    --quantize_dequantize_weights" << std::endl;
+  std::cout << "    --quantize_with_minmax" << std::endl;
+  std::cout << "    --requantize" << std::endl;
+}
+
 void print_version(void)
 {
   std::cout << "circle-quantizer version " << vconone::get_string() << std::endl;
@@ -53,6 +60,7 @@ int entry(int argc, char **argv)
 
   const std::string qdqw = "--quantize_dequantize_weights";
   const std::string qwmm = "--quantize_with_minmax";
+  const std::string rq = "--requantize";
 
   arser::Arser arser("circle-quantizer provides circle model quantization");
 
@@ -79,6 +87,14 @@ int entry(int argc, char **argv)
             "Three arguments required: input_dtype(float32) "
             "output_dtype(uint8) granularity(layer, channel)");
 
+  arser.add_argument(rq)
+      .nargs(2)
+      .type(arser::DataType::STR_VEC)
+      .required(false)
+      .help("Requantize a quantized model. "
+            "Two arguments required: input_dtype(int8) "
+            "output_dtype(uint8)");
+
   arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
   arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
 
@@ -95,6 +111,11 @@ int entry(int argc, char **argv)
 
   if (arser[qdqw])
   {
+    if (arser[qwmm] || arser[rq])
+    {
+      print_exclusive_options();
+      return 255;
+    }
     auto values = arser.get<std::vector<std::string>>(qdqw);
     if (values.size() != 3)
     {
@@ -110,6 +131,11 @@ int entry(int argc, char **argv)
 
   if (arser[qwmm])
   {
+    if (arser[qdqw] || arser[rq])
+    {
+      print_exclusive_options();
+      return 255;
+    }
     auto values = arser.get<std::vector<std::string>>(qwmm);
     if (values.size() != 3)
     {
@@ -123,12 +149,40 @@ int entry(int argc, char **argv)
     options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
   }
 
+  if (arser[rq])
+  {
+    if (arser[qwmm] || arser[qdqw])
+    {
+      print_exclusive_options();
+      return 255;
+    }
+    auto values = arser.get<std::vector<std::string>>(rq);
+    if (values.size() != 2)
+    {
+      std::cerr << arser;
+      return 255;
+    }
+    options->enable(Algorithms::Requantize);
+
+    options->param(AlgorithmParameters::Quantize_input_dtype, values.at(0));
+    options->param(AlgorithmParameters::Quantize_output_dtype, values.at(1));
+  }
+
   std::string input_path = arser.get<std::string>("input");
   std::string output_path = arser.get<std::string>("output");
 
   // Load model from the file
   foder::FileLoader file_loader{input_path};
   std::vector<char> model_data = file_loader.load();
+
+  // Verify flatbuffers
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
+  if (!circle::VerifyModelBuffer(verifier))
+  {
+    std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
+    return EXIT_FAILURE;
+  }
+
   const circle::Model *circle_model = circle::GetModel(model_data.data());
   if (circle_model == nullptr)
   {
@@ -157,7 +211,7 @@ int entry(int argc, char **argv)
   // Export to output Circle file
   luci::CircleExporter exporter;
 
-  CircleExpContract contract(module.get(), output_path);
+  luci::CircleFileExpContract contract(module.get(), output_path);
 
   if (!exporter.invoke(&contract))
   {
diff --git a/compiler/circle2circle-dredd-recipe-test/test.lst b/compiler/circle2circle-dredd-recipe-test/test.lst
index 6328a64db..302c3a796 100644
--- a/compiler/circle2circle-dredd-recipe-test/test.lst
+++ b/compiler/circle2circle-dredd-recipe-test/test.lst
@@ -10,6 +10,7 @@
 
 ## TFLITE RECIPE
 
+Add(Net_TConv_BN_000 PASS fuse_batchnorm_with_tconv)
 Add(Net_InstanceNorm_001 PASS fuse_instnorm)
 Add(Net_InstanceNorm_002 PASS fuse_instnorm)
 Add(BatchMatMulV2_000 PASS resolve_customop_batchmatmul)
diff --git a/compiler/circle2circle/include/CircleExpContract.h b/compiler/circle2circle/include/CircleExpContract.h
deleted file mode 100644
index 313b16d22..000000000
--- a/compiler/circle2circle/include/CircleExpContract.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __CIRCLE2CIRCLE_CIRCLEXPCONTRACT_H__
-#define __CIRCLE2CIRCLE_CIRCLEXPCONTRACT_H__
-
-#include <loco.h>
-#include <luci/CircleExporter.h>
-#include <luci/IR/Module.h>
-#include <mio/circle/schema_generated.h>
-
-#include <memory>
-#include <string>
-
-struct CircleExpContract : public luci::CircleExporter::Contract
-{
-public:
-  CircleExpContract(luci::Module *module, const std::string &filename)
-      : _module(module), _filepath(filename)
-  {
-    // NOTHING TO DO
-  }
-  virtual ~CircleExpContract() = default;
-
-public:
-  loco::Graph *graph(void) const final { return nullptr; }
-  luci::Module *module(void) const final { return _module; };
-
-public:
-  bool store(const char *ptr, const size_t size) const final;
-
-private:
-  luci::Module *_module;
-  const std::string _filepath;
-};
-
-#endif // __CIRCLE2CIRCLE_CIRCLEXPCONTRACT_H__
diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp
index 849597b46..39ceade3a 100644
--- a/compiler/circle2circle/src/Circle2Circle.cpp
+++ b/compiler/circle2circle/src/Circle2Circle.cpp
@@ -14,14 +14,13 @@
  * limitations under the License.
  */
 
-#include "CircleExpContract.h"
-
 #include <foder/FileLoader.h>
 
 #include <luci/Importer.h>
 #include <luci/CircleOptimizer.h>
 #include <luci/Service/Validate.h>
 #include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
 #include <luci/UserSettings.h>
 
 #include <oops/InternalExn.h>
@@ -61,6 +60,12 @@ int entry(int argc, char **argv)
   arser.add_argument("--all").nargs(0).required(false).default_value(false).help(
       "Enable all optimize options");
 
+  arser.add_argument("--fuse_batchnorm_with_tconv")
+      .nargs(0)
+      .required(false)
+      .default_value(false)
+      .help("This will fuse BatchNorm operators to Transposed Convolution operator");
+
   arser.add_argument("--fuse_bcq")
       .nargs(0)
       .required(false)
@@ -101,7 +106,7 @@ int entry(int argc, char **argv)
       .nargs(0)
       .required(false)
       .default_value(false)
-      .help("This will turn off operator vaidations. May help input model investigation.");
+      .help("This will turn off operator validations. May help input model investigation.");
 
   arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
   arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
@@ -125,6 +130,8 @@ int entry(int argc, char **argv)
     options->enable(Algorithms::ResolveCustomOpBatchMatMul);
     options->enable(Algorithms::ResolveCustomOpMatMul);
   }
+  if (arser.get<bool>("--fuse_batchnorm_with_tconv"))
+    options->enable(Algorithms::FuseBatchNormWithTConv);
   if (arser.get<bool>("--fuse_bcq"))
     options->enable(Algorithms::FuseBCQ);
   if (arser.get<bool>("--fuse_instnorm"))
@@ -157,6 +164,14 @@ int entry(int argc, char **argv)
     std::cerr << err.what() << std::endl;
     return EXIT_FAILURE;
   }
+
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
+  if (!circle::VerifyModelBuffer(verifier))
+  {
+    std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
+    return EXIT_FAILURE;
+  }
+
   const circle::Model *circle_model = circle::GetModel(model_data.data());
   if (circle_model == nullptr)
   {
@@ -177,15 +192,20 @@ int entry(int argc, char **argv)
 
     if (!luci::validate(graph))
     {
-      std::cerr << "ERROR: Optimized graph is invalid" << std::endl;
-      return 255;
+      if (settings->get(luci::UserSettings::Key::DisableValidation))
+        std::cerr << "WARNING: Optimized graph is invalid" << std::endl;
+      else
+      {
+        std::cerr << "ERROR: Optimized graph is invalid" << std::endl;
+        return 255;
+      }
     }
   }
 
   // Export to output Circle file
   luci::CircleExporter exporter;
 
-  CircleExpContract contract(module.get(), output_path);
+  luci::CircleFileExpContract contract(module.get(), output_path);
 
   if (!exporter.invoke(&contract))
   {
diff --git a/compiler/circlechef/circle/CMakeLists.txt b/compiler/circlechef/circle/CMakeLists.txt
index 75165ada3..2ca016b84 100644
--- a/compiler/circlechef/circle/CMakeLists.txt
+++ b/compiler/circlechef/circle/CMakeLists.txt
@@ -7,3 +7,4 @@ target_link_libraries(circlechef_circle circlechef_proto)
 target_link_libraries(circlechef_circle mio_circle)
 target_link_libraries(circlechef_circle stdex)
 target_link_libraries(circlechef_circle cwrap)
+target_link_libraries(circlechef_circle souschef)
diff --git a/compiler/circlechef/circle/src/CircleImport.h b/compiler/circlechef/circle/src/CircleImport.h
index a8ef3ee44..23ca29beb 100644
--- a/compiler/circlechef/circle/src/CircleImport.h
+++ b/compiler/circlechef/circle/src/CircleImport.h
@@ -19,6 +19,8 @@
 
 #include <mio/circle/schema_generated.h>
 
+#include <souschef/TensorFiller.h>
+
 #include <circlechef.pb.h>
 
 #include <map>
@@ -40,7 +42,7 @@ bool is_custom(const circle::OperatorCode *opcode);
 /**
  * @brief Loads TF lite file and provides helpers to access attributes
  */
-class CircleImport
+class CircleImport : public souschef::TensorFiller
 {
 public:
   CircleImport(const circle::Model *model);
@@ -63,63 +65,6 @@ public:
   std::string opcode_name(const circle::Operator *op) const;
   size_t buffer_info(const circle::Tensor *tensor, const uint8_t **buff_data);
 
-  /**
-   * @brief This will record the tensor by index, if it needs filler option,
-   *        such as kernel, bias.
-   */
-  void set_tensor_filler(uint32_t tensor_index) { _tensor_filler[tensor_index] = true; }
-
-  /**
-   * @brief This will store int32 filler values such as reshape information for the tensor
-   */
-  void set_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
-  {
-    _tensor_filler_vint32[tensor_index] = expvalues;
-  }
-
-  void set_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
-  {
-    _tensor_filler_vfloat[tensor_index] = expvalues;
-  }
-
-  /**
-   * @brief This will return true if the tensor by index, needs a filler option.
-   */
-  bool get_tensor_filler(uint32_t tensor_index)
-  {
-    auto it = _tensor_filler.find(tensor_index);
-    if (it != _tensor_filler.end())
-    {
-      return it->second;
-    }
-    return false;
-  }
-
-  /**
-   * @brief This will return true if the tensor by index, needs a int array filler option.
-   */
-  bool get_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
-  {
-    auto it = _tensor_filler_vint32.find(tensor_index);
-    if (it != _tensor_filler_vint32.end())
-    {
-      expvalues = it->second;
-      return true;
-    }
-    return false;
-  }
-
-  bool get_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
-  {
-    auto it = _tensor_filler_vfloat.find(tensor_index);
-    if (it != _tensor_filler_vfloat.end())
-    {
-      expvalues = it->second;
-      return true;
-    }
-    return false;
-  }
-
 private:
   const CircleSubGraphs_t *_subgraphs{nullptr};
   const CircleBuffers_t *_buffers{nullptr};
@@ -129,10 +74,6 @@ private:
   std::vector<const circle::OperatorCode *> _op_codes{};
   std::vector<int32_t> _inputs{};
   std::vector<int32_t> _outputs{};
-
-  std::map<uint32_t, bool> _tensor_filler{};
-  std::map<uint32_t, std::vector<int32_t>> _tensor_filler_vint32{};
-  std::map<uint32_t, std::vector<float>> _tensor_filler_vfloat{};
 };
 
 } // namespace circlechef
diff --git a/compiler/circlechef/core/src/ModelChef.cpp b/compiler/circlechef/core/src/ModelChef.cpp
index d81467d68..aa54678ec 100644
--- a/compiler/circlechef/core/src/ModelChef.cpp
+++ b/compiler/circlechef/core/src/ModelChef.cpp
@@ -26,6 +26,7 @@
 #include "OpChefs.h"
 
 #include <souschef/Dataset.h>
+#include <souschef/Dims.h>
 
 #include "Log.h"
 
@@ -41,52 +42,8 @@
 #include <sstream>
 #include <stdexcept>
 
-namespace
-{
-
 using namespace souschef;
 
-template <typename T> std::vector<T> as_vector(const ::google::protobuf::RepeatedPtrField<T> &field)
-{
-  std::vector<T> res;
-  for (const auto &elem : field)
-  {
-    res.emplace_back(elem);
-  }
-  return res;
-}
-
-template <typename T> Dataset<T> as_dataset(const ::google::protobuf::RepeatedPtrField<T> &field)
-{
-  return Dataset<T>(as_vector<T>(field));
-}
-
-} // namespace
-
-namespace
-{
-
-template <typename T> using Dims = std::vector<T>;
-
-Dims<int32_t> as_dims(const circlechef::TensorShape &shape)
-{
-  std::vector<int32_t> res;
-
-  for (auto &dim : shape.dim())
-  {
-    res.emplace_back(static_cast<int32_t>(dim));
-  }
-
-  return res;
-}
-
-int32_t element_count(const Dims<int32_t> &dims)
-{
-  return std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<int32_t>());
-}
-
-} // namespace
-
 namespace
 {
 
diff --git a/compiler/circledump/src/OpPrinter.cpp b/compiler/circledump/src/OpPrinter.cpp
index 3294bb23d..a0a063e79 100644
--- a/compiler/circledump/src/OpPrinter.cpp
+++ b/compiler/circledump/src/OpPrinter.cpp
@@ -725,6 +725,7 @@ OpPrinterRegistry::OpPrinterRegistry()
   _op_map[circle::BuiltinOperator_MIRROR_PAD] = make_unique<MirrorPadPrinter>();
   _op_map[circle::BuiltinOperator_MUL] = make_unique<MulPrinter>();
   // There is no Option for NON_MAX_SUPPRESSION_V4
+  // There is no Option for NON_MAX_SUPPRESSION_V5
   _op_map[circle::BuiltinOperator_ONE_HOT] = make_unique<OneHotPrinter>();
   _op_map[circle::BuiltinOperator_PACK] = make_unique<PackPrinter>();
   // There is no Option for PAD
diff --git a/compiler/common-artifacts/CMakeLists.txt b/compiler/common-artifacts/CMakeLists.txt
index ef50e8d43..ec9e3cf85 100644
--- a/compiler/common-artifacts/CMakeLists.txt
+++ b/compiler/common-artifacts/CMakeLists.txt
@@ -33,10 +33,12 @@ set(REQUIREMENTS_FILE "requirements.txt")
 set(REQUIREMENTS_OVERLAY_PATH_TF_1_13_2 "${VIRTUALENV_OVERLAY_TF_1_13_2}/${REQUIREMENTS_FILE}")
 set(REQUIREMENTS_OVERLAY_PATH_TF_2_3_0 "${VIRTUALENV_OVERLAY_TF_2_3_0}/${REQUIREMENTS_FILE}")
 
+# TODO remove version number of '--upgrade pip==20.2.1 setuptools==49.3.0'
+# NOTE adding version is for temporary hotfix of setuptools 50.x.y version
 add_custom_command(
   OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
   COMMAND ${CMAKE_COMMAND} -E echo "tensorflow==1.13.2" > ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
-  COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install --upgrade pip setuptools
+  COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0
   COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} --upgrade
   DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2}
 )
@@ -46,7 +48,7 @@ add_custom_command(
   COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
   COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.3.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
   COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
-  COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip setuptools
+  COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0
   COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} --upgrade
   DEPENDS ${VIRTUALENV_OVERLAY_TF_2_3_0}
 )
@@ -233,10 +235,10 @@ foreach(RECIPE IN ITEMS ${RECIPES})
 
   set(INPUT_HDF5_FILE "${RECIPE}${OPT_FORMAT}.input.h5")
   set(INPUT_BIN_PATH "${CMAKE_CURRENT_BINARY_DIR}/${INPUT_HDF5_FILE}")
-  
+
   set(EXPECTED_HDF5_FILE "${RECIPE}${OPT_FORMAT}.expected.h5")
   set(EXPECTED_BIN_PATH "${CMAKE_CURRENT_BINARY_DIR}/${EXPECTED_HDF5_FILE}")
-  
+
   if(NOT DEFINED NO_TCGEN_${RECIPE})
     # Generate input.h5, expected.h5
     add_custom_command(OUTPUT ${INPUT_BIN_PATH} ${EXPECTED_BIN_PATH}
@@ -244,7 +246,7 @@ foreach(RECIPE IN ITEMS ${RECIPES})
       DEPENDS $<TARGET_FILE:testDataGenerator> ${MODEL_FILE}
       COMMENT "Generate ${INPUT_BIN_PATH} and ${EXPECTED_BIN_PATH}"
     )
-    
+
     # Generate test directory
     set(TC_DIRECTORY "${NNPKG_PATH}/metadata/tc")
     add_custom_command(OUTPUT ${TC_DIRECTORY}
@@ -252,7 +254,7 @@ foreach(RECIPE IN ITEMS ${RECIPES})
       DEPENDS ${NNPKG_PATH}
       COMMENT "Generate ${RECIPE} nnpackage test directory"
     )
-    
+
     # Move input hdf5 file to test directory
     set(INPUT_NNPKG_PATH "${TC_DIRECTORY}/input.h5")
     add_custom_command(OUTPUT ${INPUT_NNPKG_PATH}
@@ -260,7 +262,7 @@ foreach(RECIPE IN ITEMS ${RECIPES})
       DEPENDS ${INPUT_BIN_PATH} ${TC_DIRECTORY}
       COMMENT "Move ${INPUT_HDF5_FILE} to nnpackage"
     )
-    
+
     # Move expected hdf5 file to test directory
     set(EXPECTED_NNPKG_PATH "${TC_DIRECTORY}/expected.h5")
     add_custom_command(OUTPUT ${EXPECTED_NNPKG_PATH}
diff --git a/compiler/common-artifacts/exclude.lst b/compiler/common-artifacts/exclude.lst
index fe9933ae0..886f607cf 100644
--- a/compiler/common-artifacts/exclude.lst
+++ b/compiler/common-artifacts/exclude.lst
@@ -96,6 +96,8 @@ tcgenerate(Mean_U8_000)
 tcgenerate(Minimum_000)
 tcgenerate(NonMaxSuppressionV4_000)
 tcgenerate(NonMaxSuppressionV4_001)
+tcgenerate(NonMaxSuppressionV5_000)
+tcgenerate(NonMaxSuppressionV5_001)
 tcgenerate(MirrorPad_000)
 tcgenerate(Mul_U8_000)
 tcgenerate(Neg_000)
diff --git a/compiler/locomotiv/src/Node/BiasAdd.cpp b/compiler/locomotiv/src/Node/BiasAdd.cpp
index dfe32ca92..b84fa7e3c 100644
--- a/compiler/locomotiv/src/Node/BiasAdd.cpp
+++ b/compiler/locomotiv/src/Node/BiasAdd.cpp
@@ -41,10 +41,12 @@ std::unique_ptr<NodeData> calc(const NodeData *input_data, const NodeData *bias_
 
 } // namespace
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::BiasAdd<loco::Domain::Tensor> *bias_add)
+using namespace locomotiv;
+
+void execute_node(loco::BiasAdd<loco::Domain::Tensor> *bias_add)
 {
   validate(bias_add, "BiasAdd is nullptr");
 
@@ -63,7 +65,7 @@ void NodeExecution::execute(loco::BiasAdd<loco::Domain::Tensor> *bias_add)
   annot_domain(bias_add, annot_domain(bias_add->value()));
 }
 
-void NodeExecution::execute(loco::BiasAdd<loco::Domain::Feature> *bias_add)
+void execute_node(loco::BiasAdd<loco::Domain::Feature> *bias_add)
 {
   validate(bias_add, "BiasAdd is nullptr");
 
@@ -82,7 +84,7 @@ void NodeExecution::execute(loco::BiasAdd<loco::Domain::Feature> *bias_add)
   annot_domain(bias_add, loco::Domain::Feature);
 }
 
-} // namespace locomotiv
+} // namespace
 
 namespace
 {
@@ -123,3 +125,18 @@ std::unique_ptr<NodeData> calc(const NodeData *input_data, const NodeData *bias_
 }
 
 } // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::BiasAdd<loco::Domain::Tensor> *bias_add)
+{
+  execute_node(bias_add);
+}
+
+void NodeExecution::execute(loco::BiasAdd<loco::Domain::Feature> *bias_add)
+{
+  execute_node(bias_add);
+}
+
+} // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/BiasEncode.cpp b/compiler/locomotiv/src/Node/BiasEncode.cpp
index c2f2b44c0..21f00a495 100644
--- a/compiler/locomotiv/src/Node/BiasEncode.cpp
+++ b/compiler/locomotiv/src/Node/BiasEncode.cpp
@@ -23,10 +23,12 @@
 #include <stdexcept>
 #include <cassert>
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::BiasEncode *bias_enc)
+using namespace locomotiv;
+
+void execute_node(loco::BiasEncode *bias_enc)
 {
   auto input_data = annot_data(bias_enc->input());
 
@@ -60,4 +62,11 @@ void NodeExecution::execute(loco::BiasEncode *bias_enc)
   annot_domain(bias_enc, loco::Domain::Bias);
 }
 
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::BiasEncode *bias_enc) { execute_node(bias_enc); }
+
 } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/ConstGen.cpp b/compiler/locomotiv/src/Node/ConstGen.cpp
index 0360b9fef..96ffbc257 100644
--- a/compiler/locomotiv/src/Node/ConstGen.cpp
+++ b/compiler/locomotiv/src/Node/ConstGen.cpp
@@ -53,10 +53,12 @@ inline uint32_t offset_by_index(const Shape &shape, const Index &index)
 
 } // namespace
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::ConstGen *constgen)
+using namespace locomotiv;
+
+void execute_node(loco::ConstGen *constgen)
 {
   uint32_t volume = 1;
 
@@ -113,4 +115,11 @@ void NodeExecution::execute(loco::ConstGen *constgen)
   annot_domain(constgen, loco::Domain::Tensor);
 }
 
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::ConstGen *constgen) { execute_node(constgen); }
+
 } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Conv2D.cpp b/compiler/locomotiv/src/Node/Conv2D.cpp
index 2e4185574..cdf0dfd56 100644
--- a/compiler/locomotiv/src/Node/Conv2D.cpp
+++ b/compiler/locomotiv/src/Node/Conv2D.cpp
@@ -139,10 +139,12 @@ Buffer<RET_T> calc_conv2D(const loco::Conv2D *conv2d, const Buffer<IFM_T> *input
 
 } // namespace
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::Conv2D *conv2d)
+using namespace locomotiv;
+
+void execute_node(loco::Conv2D *conv2d)
 {
   auto ifm_data = annot_data(conv2d->ifm());
   auto ker_data = annot_data(conv2d->ker());
@@ -176,4 +178,11 @@ void NodeExecution::execute(loco::Conv2D *conv2d)
   annot_domain(conv2d, loco::Domain::Feature);
 }
 
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Conv2D *conv2d) { execute_node(conv2d); }
+
 } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp b/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp
index 92d5aa161..f39cd177e 100644
--- a/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp
+++ b/compiler/locomotiv/src/Node/DepthwiseConv2D.cpp
@@ -143,10 +143,12 @@ Buffer<RET_T> calc_dw_conv2d(const loco::DepthwiseConv2D *dw_conv2d, const Buffe
 
 } // namespace
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::DepthwiseConv2D *dw_conv2d)
+using namespace locomotiv;
+
+void execute_node(loco::DepthwiseConv2D *dw_conv2d)
 {
   auto ifm_data = annot_data(dw_conv2d->ifm());
   auto ker_data = annot_data(dw_conv2d->ker());
@@ -182,4 +184,11 @@ void NodeExecution::execute(loco::DepthwiseConv2D *dw_conv2d)
   annot_domain(dw_conv2d, loco::Domain::Feature);
 }
 
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::DepthwiseConv2D *dw_conv2d) { execute_node(dw_conv2d); }
+
 } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp
index 17004901f..03f5bf833 100644
--- a/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp
+++ b/compiler/locomotiv/src/Node/DepthwiseFilterEncode.cpp
@@ -79,10 +79,12 @@ std::unique_ptr<locomotiv::NodeData> dw_filter_encode(const loco::DepthwiseFilte
 
 } // namespace
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::DepthwiseFilterEncode *enc)
+using namespace locomotiv;
+
+void execute_node(loco::DepthwiseFilterEncode *enc)
 {
   auto input_data = annot_data(enc->input());
 
@@ -110,4 +112,11 @@ void NodeExecution::execute(loco::DepthwiseFilterEncode *enc)
   annot_domain(enc, loco::Domain::DepthwiseFilter);
 }
 
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::DepthwiseFilterEncode *enc) { execute_node(enc); }
+
 } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/FeatureDecode.cpp b/compiler/locomotiv/src/Node/FeatureDecode.cpp
index 8a56a56b2..8776e1b42 100644
--- a/compiler/locomotiv/src/Node/FeatureDecode.cpp
+++ b/compiler/locomotiv/src/Node/FeatureDecode.cpp
@@ -72,10 +72,12 @@ std::unique_ptr<locomotiv::NodeData> feature_decode(const loco::FeatureDecode *n
 
 } // namespace
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::FeatureDecode *dec)
+using namespace locomotiv;
+
+void execute_node(loco::FeatureDecode *dec)
 {
   auto input_data = annot_data(dec->input());
 
@@ -109,4 +111,11 @@ void NodeExecution::execute(loco::FeatureDecode *dec)
   annot_domain(dec, loco::Domain::Tensor);
 }
 
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::FeatureDecode *dec) { execute_node(dec); }
+
 } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/FilterEncode.cpp b/compiler/locomotiv/src/Node/FilterEncode.cpp
index cd9d708dc..0e2ac918f 100644
--- a/compiler/locomotiv/src/Node/FilterEncode.cpp
+++ b/compiler/locomotiv/src/Node/FilterEncode.cpp
@@ -74,10 +74,12 @@ std::unique_ptr<locomotiv::NodeData> filter_encode(const loco::FilterEncode *nod
 
 } // namespace
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::FilterEncode *enc)
+using namespace locomotiv;
+
+void execute_node(loco::FilterEncode *enc)
 {
   auto input_data = annot_data(enc->input());
 
@@ -111,4 +113,11 @@ void NodeExecution::execute(loco::FilterEncode *enc)
   annot_domain(enc, loco::Domain::Filter);
 }
 
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::FilterEncode *enc) { execute_node(enc); }
+
 } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Forward.cpp b/compiler/locomotiv/src/Node/Forward.cpp
index eb7d44a59..9095ecf00 100644
--- a/compiler/locomotiv/src/Node/Forward.cpp
+++ b/compiler/locomotiv/src/Node/Forward.cpp
@@ -23,10 +23,12 @@
 #include <stdexcept>
 #include <cassert>
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::Forward *forward)
+using namespace locomotiv;
+
+void execute_node(loco::Forward *forward)
 {
   auto input_data = annot_data(forward->input());
 
@@ -59,4 +61,11 @@ void NodeExecution::execute(loco::Forward *forward)
   annot_domain(forward, annot_domain(forward->input()));
 }
 
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Forward *forward) { execute_node(forward); }
+
 } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/MatMul.cpp b/compiler/locomotiv/src/Node/MatMul.cpp
index 77b7315a9..e5d149ac5 100644
--- a/compiler/locomotiv/src/Node/MatMul.cpp
+++ b/compiler/locomotiv/src/Node/MatMul.cpp
@@ -82,10 +82,12 @@ template <typename T> Buffer<T> calc_mat_mul(const Buffer<T> *lhs_buf, const Buf
 
 } // namespace
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::MatMul *mat_mul)
+using namespace locomotiv;
+
+void execute_node(loco::MatMul *mat_mul)
 {
   auto lhs_data = annot_data(mat_mul->lhs());
   auto rhs_data = annot_data(mat_mul->rhs());
@@ -130,4 +132,11 @@ void NodeExecution::execute(loco::MatMul *mat_mul)
   annot_domain(mat_mul, loco::Domain::Matrix);
 }
 
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::MatMul *mat_mul) { execute_node(mat_mul); }
+
 } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/MatrixDecode.cpp b/compiler/locomotiv/src/Node/MatrixDecode.cpp
index c591676ae..0310015f1 100644
--- a/compiler/locomotiv/src/Node/MatrixDecode.cpp
+++ b/compiler/locomotiv/src/Node/MatrixDecode.cpp
@@ -68,10 +68,12 @@ std::unique_ptr<locomotiv::NodeData> matrix_decode(const loco::MatrixDecode *nod
 
 } // namespace
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::MatrixDecode *matrix_dec)
+using namespace locomotiv;
+
+void execute_node(loco::MatrixDecode *matrix_dec)
 {
   auto input_data = annot_data(matrix_dec->input());
 
@@ -106,4 +108,11 @@ void NodeExecution::execute(loco::MatrixDecode *matrix_dec)
   annot_domain(matrix_dec, loco::Domain::Tensor);
 }
 
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::MatrixDecode *matrix_dec) { execute_node(matrix_dec); }
+
 } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/MaxPool2D.cpp b/compiler/locomotiv/src/Node/MaxPool2D.cpp
index 5d92f89f5..8dce1cb1e 100644
--- a/compiler/locomotiv/src/Node/MaxPool2D.cpp
+++ b/compiler/locomotiv/src/Node/MaxPool2D.cpp
@@ -129,10 +129,12 @@ nncc::core::ADT::tensor::Buffer<T> maxPool2D(const loco::MaxPool2D *maxpool2d,
 
 } // namespace
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::MaxPool2D *maxpool2d)
+using namespace locomotiv;
+
+void execute_node(loco::MaxPool2D *maxpool2d)
 {
   auto ifm_data = annot_data(maxpool2d->ifm());
 
@@ -164,4 +166,11 @@ void NodeExecution::execute(loco::MaxPool2D *maxpool2d)
   annot_domain(maxpool2d, loco::Domain::Feature);
 }
 
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::MaxPool2D *maxpool2d) { execute_node(maxpool2d); }
+
 } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Pull.cpp b/compiler/locomotiv/src/Node/Pull.cpp
index c482d8b04..fe5d7c2e1 100644
--- a/compiler/locomotiv/src/Node/Pull.cpp
+++ b/compiler/locomotiv/src/Node/Pull.cpp
@@ -24,10 +24,12 @@
 #include <cassert>
 #include <stdexcept>
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::Pull *pull)
+using namespace locomotiv;
+
+void execute_node(loco::Pull *pull)
 {
 // TODO Remove deprecated code
 #if 0
@@ -69,4 +71,11 @@ void NodeExecution::execute(loco::Pull *pull)
   annot_domain(pull, loco::Domain::Tensor);
 }
 
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Pull *pull) { execute_node(pull); }
+
 } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Push.cpp b/compiler/locomotiv/src/Node/Push.cpp
index fc5808b15..4e1c6c3b8 100644
--- a/compiler/locomotiv/src/Node/Push.cpp
+++ b/compiler/locomotiv/src/Node/Push.cpp
@@ -23,10 +23,12 @@
 #include <stdexcept>
 #include <cassert>
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::Push *push)
+using namespace locomotiv;
+
+void execute_node(loco::Push *push)
 {
   auto from_data = annot_data(push->from());
 
@@ -58,4 +60,11 @@ void NodeExecution::execute(loco::Push *push)
   annot_domain(push, loco::Domain::Tensor);
 }
 
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Push *push) { execute_node(push); }
+
 } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Reshape.cpp b/compiler/locomotiv/src/Node/Reshape.cpp
index ac1672024..a9c07bee1 100644
--- a/compiler/locomotiv/src/Node/Reshape.cpp
+++ b/compiler/locomotiv/src/Node/Reshape.cpp
@@ -36,10 +36,12 @@ using nncc::core::ADT::tensor::num_elements;
 #include <cstring>
 #include <vector>
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::Reshape<loco::ReshapeType::Fixed> *reshape)
+using namespace locomotiv;
+
+void execute_node(loco::Reshape<loco::ReshapeType::Fixed> *reshape)
 {
   auto input_data = annot_data(reshape->input());
 
@@ -87,4 +89,14 @@ void NodeExecution::execute(loco::Reshape<loco::ReshapeType::Fixed> *reshape)
   annot_domain(reshape, annot_domain(reshape->input()));
 }
 
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::Reshape<loco::ReshapeType::Fixed> *reshape)
+{
+  execute_node(reshape);
+}
+
 } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/Softmax.cpp b/compiler/locomotiv/src/Node/Softmax.cpp
index 352598b27..0018eb66f 100644
--- a/compiler/locomotiv/src/Node/Softmax.cpp
+++ b/compiler/locomotiv/src/Node/Softmax.cpp
@@ -65,10 +65,12 @@ Shape reduce_shape(const Shape &shape, uint32_t axis)
 
 } // namespace
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::TensorSoftmax *softmax)
+using namespace locomotiv;
+
+void execute_node(loco::TensorSoftmax *softmax)
 {
   auto input_data = annot_data(softmax->input());
 
@@ -119,4 +121,11 @@ void NodeExecution::execute(loco::TensorSoftmax *softmax)
   annot_domain(softmax, annot_domain(softmax->input()));
 }
 
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorSoftmax *softmax) { execute_node(softmax); }
+
 } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorBroadcast.cpp b/compiler/locomotiv/src/Node/TensorBroadcast.cpp
index 010ca6821..38e5a7aa9 100644
--- a/compiler/locomotiv/src/Node/TensorBroadcast.cpp
+++ b/compiler/locomotiv/src/Node/TensorBroadcast.cpp
@@ -34,10 +34,12 @@ using nncc::core::ADT::tensor::Shape;
 #include <cassert>
 #include <stdexcept>
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::TensorBroadcast *tensor_broadcast)
+using namespace locomotiv;
+
+void execute_node(loco::TensorBroadcast *tensor_broadcast)
 {
   auto input_data = annot_data(tensor_broadcast->input());
 
@@ -103,4 +105,14 @@ void NodeExecution::execute(loco::TensorBroadcast *tensor_broadcast)
   annot_domain(tensor_broadcast, loco::Domain::Tensor);
 }
 
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorBroadcast *tensor_broadcast)
+{
+  execute_node(tensor_broadcast);
+}
+
 } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorConcat.cpp b/compiler/locomotiv/src/Node/TensorConcat.cpp
index 3187a7f75..188bb635b 100644
--- a/compiler/locomotiv/src/Node/TensorConcat.cpp
+++ b/compiler/locomotiv/src/Node/TensorConcat.cpp
@@ -35,10 +35,12 @@ using nncc::core::ADT::tensor::Shape;
 #include <cassert>
 #include <stdexcept>
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::TensorConcat *tensor_concat)
+using namespace locomotiv;
+
+void execute_node(loco::TensorConcat *tensor_concat)
 {
   validate(tensor_concat, "TensorConcat is nullptr");
 
@@ -112,4 +114,11 @@ void NodeExecution::execute(loco::TensorConcat *tensor_concat)
   annot_domain(tensor_concat, loco::Domain::Tensor);
 }
 
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorConcat *tensor_concat) { execute_node(tensor_concat); }
+
 } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorConstantPad.cpp b/compiler/locomotiv/src/Node/TensorConstantPad.cpp
index cd81a3a4d..5d4ad5d24 100644
--- a/compiler/locomotiv/src/Node/TensorConstantPad.cpp
+++ b/compiler/locomotiv/src/Node/TensorConstantPad.cpp
@@ -31,10 +31,12 @@ using nncc::core::ADT::tensor::IndexEnumerator;
 using nncc::core::ADT::tensor::LexicalLayout;
 using nncc::core::ADT::tensor::make_buffer;
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::TensorConstantPad *pad)
+using namespace locomotiv;
+
+void execute_node(loco::TensorConstantPad *pad)
 {
   validate(pad, "TensorConstantPad is nullptr");
 
@@ -112,4 +114,11 @@ void NodeExecution::execute(loco::TensorConstantPad *pad)
   annot_domain(pad, annot_domain(pad->input()));
 }
 
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorConstantPad *pad) { execute_node(pad); }
+
 } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TensorReduce.cpp b/compiler/locomotiv/src/Node/TensorReduce.cpp
index a60ebd890..1f619a31a 100644
--- a/compiler/locomotiv/src/Node/TensorReduce.cpp
+++ b/compiler/locomotiv/src/Node/TensorReduce.cpp
@@ -115,10 +115,12 @@ void apply(Buffer<T> &lhs, const Buffer<T> &rhs, const loco::TensorReduce &node)
 
 } // namespace
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::TensorReduce *node)
+using namespace locomotiv;
+
+void execute_node(loco::TensorReduce *node)
 {
   auto input_data = annot_data(node->input());
   validate(input_data, "Input not ready");
@@ -149,4 +151,11 @@ void NodeExecution::execute(loco::TensorReduce *node)
   annot_domain(node, annot_domain(node->input()));
 }
 
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TensorReduce *node) { execute_node(node); }
+
 } // namespace locomotiv
diff --git a/compiler/locomotiv/src/Node/TransposedConv2D.cpp b/compiler/locomotiv/src/Node/TransposedConv2D.cpp
index 3ea4f071d..bec15a5df 100644
--- a/compiler/locomotiv/src/Node/TransposedConv2D.cpp
+++ b/compiler/locomotiv/src/Node/TransposedConv2D.cpp
@@ -147,10 +147,12 @@ Buffer<RET_T> calc_tr_conv2D(const loco::TransposedConv2D *tr_conv2d,
 
 } // namespace
 
-namespace locomotiv
+namespace
 {
 
-void NodeExecution::execute(loco::TransposedConv2D *tr_conv2d)
+using namespace locomotiv;
+
+void execute_node(loco::TransposedConv2D *tr_conv2d)
 {
   auto ifm_data = annot_data(tr_conv2d->ifm());
   auto ker_data = annot_data(tr_conv2d->ker());
@@ -186,4 +188,11 @@ void NodeExecution::execute(loco::TransposedConv2D *tr_conv2d)
   annot_domain(tr_conv2d, loco::Domain::Feature);
 }
 
+} // namespace
+
+namespace locomotiv
+{
+
+void NodeExecution::execute(loco::TransposedConv2D *tr_conv2d) { execute_node(tr_conv2d); }
+
 } // namespace locomotiv
diff --git a/compiler/luci-interpreter/src/CMakeLists.txt b/compiler/luci-interpreter/src/CMakeLists.txt
index 6a66f1425..47b68fa40 100644
--- a/compiler/luci-interpreter/src/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/CMakeLists.txt
@@ -1,6 +1,7 @@
-nnas_find_package(TensorFlowSource EXACT 2.1.0 QUIET)
-nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.1.0 QUIET)
-nnas_find_package(TensorFlowEigenSource EXACT 2.1.0 QUIET)
+nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET)
+nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET)
+nnas_find_package(TensorFlowEigenSource EXACT 2.3.0 QUIET)
+nnas_find_package(TensorFlowRuySource EXACT 2.3.0 QUIET)
 
 if (NOT TensorFlowSource_FOUND)
   message(STATUS "Skipping luci-interpreter: TensorFlow not found")
@@ -17,6 +18,11 @@ if (NOT TensorFlowEigenSource_FOUND)
   return()
 endif ()
 
+if (NOT TensorFlowRuySource_FOUND)
+  message(STATUS "Skipping luci-interpreter: Ruy not found")
+  return()
+endif ()
+
 add_subdirectory(core)
 add_subdirectory(kernels)
 add_subdirectory(loader)
diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
index 5ac3b2f7a..2ab7ff0da 100644
--- a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
@@ -93,6 +93,21 @@ TYPED_TEST(ArgMaxTest, MultiDimensions)
                             /*dimension_data=*/{3}, /*output_data=*/{3, 1});
 }
 
+TEST(ArgMaxTest, UnsupportedType_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, {
+                                                                             1, 2, 7, 8, 1, 9, 7, 3,
+                                                                         });
+  Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3});
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  ArgMaxParams params{};
+  params.output_type = DataType::U8;
+  ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params);
+  kernel.configure();
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
 } // namespace
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
index 6d1b8ead4..cdd81d7d6 100644
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
@@ -35,6 +35,14 @@ AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, const Pool2DPa
 
 void AveragePool2D::configure()
 {
+  if (input()->element_type() != output()->element_type())
+  {
+    throw std::runtime_error("Input Tensor and Output Tensor Type must be same");
+  }
+  if (input()->shape().num_dims() != 4)
+  {
+    throw std::runtime_error("Input Tensor Shape must be 4-D");
+  }
   const Shape &input_shape = input()->shape();
 
   const int32_t batches = input_shape.dim(0);
@@ -51,7 +59,14 @@ void AveragePool2D::configure()
       computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
   _padding_width =
       computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
-
+  if (input()->element_type() == DataType::U8)
+  {
+    if (input()->scale() != output()->scale() || input()->zero_point() != output()->zero_point())
+    {
+      throw std::runtime_error(
+          "Quantization param for Input and output must be same(scale or zero-point)");
+    }
+  }
   output()->resize({batches, output_height, output_width, depth});
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
index 7160e49e9..cc80e5e90 100644
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
@@ -122,6 +122,80 @@ TEST(AveragePool2DTest, Uint8_1)
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1}));
 }
 
+TEST(AveragePool2DTest, Invalid_Input_Shape_NEG)
+{
+  Shape input_shape{1, 3, 5};
+  std::vector<float> input_data{
+      -4, -3, -2, -1, 0,  //
+      1,  2,  3,  4,  5,  //
+      6,  7,  8,  9,  10, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(AveragePool2DTest, In_Out_Type_NEG)
+{
+  Shape input_shape{1, 3, 5, 1};
+  std::vector<float> input_data{
+      -4, -3, -2, -1, 0,  //
+      1,  2,  3,  4,  5,  //
+      6,  7,  8,  9,  10, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(AveragePool2DTest, Quant_Param_NEG)
+{
+  std::pair<float, int32_t> quant_param1 = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
+  std::pair<float, int32_t> quant_param2 = quantizationParams<uint8_t>(-7.875f, 7.875f);
+  Tensor input_tensor{
+      DataType::U8, {1, 2, 4, 1}, {{quant_param1.first}, {quant_param1.second}}, ""};
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param2.first, quant_param2.second);
+
+  std::vector<uint8_t> quant_input = quantize<uint8_t>(
+      {
+          0, -6, 12, 4,  //
+          -3, -2, 10, 7, //
+      },
+      quant_param1.first, quant_param1.second);
+  input_tensor.writeData(quant_input.data(), quant_input.size() * sizeof(uint8_t));
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
 } // namespace
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
index a1fd1deaf..040ac5911 100644
--- a/compiler/luci-interpreter/src/kernels/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
@@ -44,6 +44,8 @@ set(SOURCES
     Reshape.cpp
     Reverse.h
     Reverse.cpp
+    Rsqrt.h
+    Rsqrt.cpp
     Slice.h
     Slice.cpp
     Softmax.h
@@ -54,8 +56,12 @@ set(SOURCES
     Split.cpp
     StridedSlice.h
     StridedSlice.cpp
+    Sqrt.h
+    Sqrt.cpp
     Squeeze.h
     Squeeze.cpp
+    Tanh.h
+    Tanh.cpp
     Transpose.h
     Transpose.cpp
     TransposeConv.h
@@ -63,12 +69,13 @@ set(SOURCES
     Unpack.h
     Unpack.cpp)
 
-list(APPEND SOURCES Utils.h Utils.cpp)
+list(APPEND SOURCES Utils.h Utils.cpp ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
 
 add_library(luci_interpreter_kernels STATIC ${SOURCES})
 set_target_properties(luci_interpreter_kernels PROPERTIES POSITION_INDEPENDENT_CODE ON)
 target_include_directories(luci_interpreter_kernels PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
 target_include_directories(luci_interpreter_kernels SYSTEM PRIVATE
+    "${TensorFlowRuySource_DIR}"
     "${TensorFlowGEMMLowpSource_DIR}"
     "${TensorFlowEigenSource_DIR}"
     "${TensorFlowSource_DIR}")
@@ -99,12 +106,15 @@ set(TEST_SOURCES
     Pad.test.cpp
     Reshape.test.cpp
     Reverse.test.cpp
+    Rsqrt.test.cpp
     Slice.test.cpp
     Softmax.test.cpp
     SpaceToDepth.test.cpp
     Split.test.cpp
     StridedSlice.test.cpp
+    Sqrt.test.cpp
     Squeeze.test.cpp
+    Tanh.test.cpp
     Transpose.test.cpp
     TransposeConv.test.cpp
     Unpack.test.cpp)
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
index 60e6134ab..a51fb4afc 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
@@ -47,21 +47,21 @@ void Conv2D::configure()
   // We only support (1) and (3) for now.
   if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
   {
-    assert(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
   }
   else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
   {
-    assert(bias() == nullptr || bias()->element_type() == DataType::S32);
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
   }
   else
   {
     throw std::runtime_error("Unsupported type.");
   }
-  assert(output()->element_type() == input()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
 
   const Shape &input_shape = input()->shape();
   const Shape &filter_shape = filter()->shape();
-  assert(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
 
   const int32_t batches = input_shape.dim(0);
   const int32_t input_height = input_shape.dim(1);
@@ -69,10 +69,10 @@ void Conv2D::configure()
   const int32_t output_depth = filter_shape.dim(0);
   const int32_t filter_height = filter_shape.dim(1);
   const int32_t filter_width = filter_shape.dim(2);
-  assert(filter_shape.dim(3) == input_shape.dim(3));
+  LUCI_INTERPRETER_CHECK(filter_shape.dim(3) == input_shape.dim(3));
 
-  assert(bias() == nullptr ||
-         (bias()->shape().num_dims() == 1 && bias()->shape().dim(0) == output_depth));
+  LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
+                                               bias()->shape().dim(0) == output_depth));
 
   const int32_t output_height =
       computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
index ef9ace903..0446d9760 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
@@ -180,6 +180,146 @@ TEST(Conv2DTest, Uint8)
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
+TEST(Conv2DTest, Unsupported_Type_Configure_NEG)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<int32_t> input_data{
+      1,  2,  3,  4,  5,  6,  // row = 0
+      7,  8,  9,  10, 11, 12, // row = 1
+      13, 14, 15, 16, 17, 18, // row = 2
+      19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+      1,  2,  -3, -4, // out = 0, row = 0
+      -5, 6,  -7, 8,  // out = 1, row = 0
+      4,  -2, 3,  -1, // out = 0, row = 1
+      -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor = makeInputTensor<DataType::S32>(input_shape, input_data);
+  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(Conv2DTest, Invalid_Bias_Type_NEG)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+      1,  2,  3,  4,  5,  6,  // row = 0
+      7,  8,  9,  10, 11, 12, // row = 1
+      13, 14, 15, 16, 17, 18, // row = 2
+      19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+      1,  2,  -3, -4, // out = 0, row = 0
+      -5, 6,  -7, 8,  // out = 1, row = 0
+      4,  -2, 3,  -1, // out = 0, row = 1
+      -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<uint8_t> bias_data{1, 2};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+  Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(Conv2DTest, Invalid_Bias_Data_NEG)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{3};
+  std::vector<float> input_data{
+      1,  2,  3,  4,  5,  6,  // row = 0
+      7,  8,  9,  10, 11, 12, // row = 1
+      13, 14, 15, 16, 17, 18, // row = 2
+      19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+      1,  2,  -3, -4, // out = 0, row = 0
+      -5, 6,  -7, 8,  // out = 1, row = 0
+      4,  -2, 3,  -1, // out = 0, row = 1
+      -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2, 3};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(Conv2DTest, Invalid_Input_Shape_NEG)
+{
+  Shape input_shape{1, 4, 6, 1};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+      1,  2,  3,  4,  5,  6,  // row = 0
+      7,  8,  9,  10, 11, 12, // row = 1
+      13, 14, 15, 16, 17, 18, // row = 2
+      19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+      1,  2,  -3, -4, // out = 0, row = 0
+      -5, 6,  -7, 8,  // out = 1, row = 0
+      4,  -2, 3,  -1, // out = 0, row = 1
+      -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
 } // namespace
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
index fce01a605..1a26debe0 100644
--- a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
@@ -39,12 +39,10 @@ void LeakyRelu::configure()
   assert(input()->element_type() == output()->element_type());
   if (input()->element_type() == DataType::U8)
   {
-    _q_alpha = static_cast<uint8_t>(std::max<float>(
-        std::numeric_limits<uint8_t>::min(),
-        std::min<float>(std::numeric_limits<uint8_t>::max(),
-                        std::round(input()->zero_point() + (params().alpha / input()->scale())))));
-    double real_multiplier = input()->scale() * input()->scale() / output()->scale();
-    quantizeMultiplierSmallerThanOneExp(real_multiplier, &_output_multiplier, &_output_shift);
+    double alpha_multiplier = input()->scale() * params().alpha / output()->scale();
+    quantizeMultiplier(alpha_multiplier, &_output_multiplier_alpha, &_output_shift_alpha);
+    double identity_multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
   }
   output()->resize(input()->shape());
 }
@@ -77,15 +75,15 @@ void LeakyRelu::evalQuantized() const
 {
   tflite::LeakyReluParams op_params{};
   op_params.input_offset = input()->zero_point();
-  op_params.alpha_offset = input()->zero_point();
   op_params.output_offset = output()->zero_point();
-
-  op_params.output_multiplier = _output_multiplier;
-  op_params.output_shift = _output_shift;
+  op_params.output_multiplier_alpha = _output_multiplier_alpha;
+  op_params.output_shift_alpha = _output_shift_alpha;
+  op_params.output_multiplier_identity = _output_multiplier_identity;
+  op_params.output_shift_identity = _output_shift_identity;
 
   tflite::reference_ops::QuantizeLeakyRelu(
-      op_params, _q_alpha, getTensorShape(input()), getTensorData<uint8_t>(input()),
-      getTensorShape(output()), getTensorData<uint8_t>(output()));
+      op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()),
+      getTensorData<uint8_t>(output()));
 }
 
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.h b/compiler/luci-interpreter/src/kernels/LeakyRelu.h
index dcc2be93f..e66f404df 100644
--- a/compiler/luci-interpreter/src/kernels/LeakyRelu.h
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.h
@@ -41,9 +41,10 @@ private:
   void evalQuantized() const;
 
 private:
-  uint8_t _q_alpha = 0;
-  int32_t _output_multiplier = 0;
-  int _output_shift = 0;
+  int32_t _output_multiplier_alpha = 0;
+  int _output_shift_alpha = 0;
+  int32_t _output_multiplier_identity = 0;
+  int _output_shift_identity = 0;
 };
 
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-interpreter/src/kernels/Mul.cpp
index a6e721a09..dd31aa099 100644
--- a/compiler/luci-interpreter/src/kernels/Mul.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mul.cpp
@@ -19,7 +19,8 @@
 
 #include "kernels/Utils.h"
 
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
 
 #include <stdexcept>
 
@@ -66,13 +67,13 @@ void Mul::evalFloat() const
 
   if (need_broadcast)
   {
-    tflite::reference_ops::BroadcastMul4DSlow(
+    tflite::optimized_ops::BroadcastMul4DSlow(
         params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
         getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
   }
   else
   {
-    tflite::reference_ops::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
+    tflite::optimized_ops::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
                                getTensorShape(input2()), getTensorData<float>(input2()),
                                getTensorShape(output()), getTensorData<float>(output()));
   }
diff --git a/compiler/luci-interpreter/src/kernels/Rsqrt.cpp b/compiler/luci-interpreter/src/kernels/Rsqrt.cpp
new file mode 100644
index 000000000..6dd92dc98
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Rsqrt.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Rsqrt.h"
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Rsqrt::Rsqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Rsqrt::configure()
+{
+  if (input()->element_type() != output()->element_type())
+  {
+    throw std::runtime_error("Input/output tensor data type mismatch.");
+  }
+  output()->resize(input()->shape());
+}
+
+void Rsqrt::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Rsqrt::evalFloat() const
+{
+  auto in = getTensorData<float>(input());
+  auto out = getTensorData<float>(output());
+  auto size = getTensorShape(input()).FlatSize();
+  for (auto i = in; i != in + size; ++i)
+  {
+    *out = 1.f / std::sqrt(*i);
+    ++out;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Rsqrt.h b/compiler/luci-interpreter/src/kernels/Rsqrt.h
new file mode 100644
index 000000000..adc5bcfa2
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Rsqrt.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RSQRT_H
+#define LUCI_INTERPRETER_KERNELS_RSQRT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Rsqrt : public Kernel
+{
+public:
+  Rsqrt(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RSQRT_H
diff --git a/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp
new file mode 100644
index 000000000..69b55d2f2
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Rsqrt.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""};
+  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(float));
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Rsqrt kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ArrayFloatNear(output_data)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(RsqrtTest, SimpleRsqrt)
+{
+  Check(
+      /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+      /*input_data=*/
+      {
+          5, 4, 8, 2,     //
+          6, 7.5, 9, 0.3, //
+      },
+      /*output_data=*/
+      {
+          0.44721360, 0.5, 0.35355339, 0.70710678,       //
+          0.40824829, 0.36514837, 0.33333333, 1.8257419, //
+      });
+}
+
+TEST(RsqrtTest, Input_Output_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Rsqrt kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(RsqrtTest, Invalid_Input_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Rsqrt kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Sqrt.cpp b/compiler/luci-interpreter/src/kernels/Sqrt.cpp
new file mode 100644
index 000000000..46e9fc9ad
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Sqrt.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sqrt.h"
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Sqrt::Sqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Sqrt::configure()
+{
+  if (input()->element_type() != output()->element_type())
+  {
+    throw std::runtime_error("Input/output tensor data type mismatch.");
+  }
+  output()->resize(input()->shape());
+}
+
+void Sqrt::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Sqrt::evalFloat() const
+{
+  auto in = getTensorData<float>(input());
+  auto out = getTensorData<float>(output());
+  auto size = getTensorShape(input()).FlatSize();
+  for (auto i = in; i != in + size; ++i)
+  {
+    *out = std::sqrt(*i);
+    ++out;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Sqrt.h b/compiler/luci-interpreter/src/kernels/Sqrt.h
new file mode 100644
index 000000000..4034655ed
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Sqrt.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQRT_H
+#define LUCI_INTERPRETER_KERNELS_SQRT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Sqrt : public Kernel
+{
+public:
+  Sqrt(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQRT_H
diff --git a/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp
new file mode 100644
index 000000000..cdd208280
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sqrt.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""};
+  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(float));
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Sqrt kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ArrayFloatNear(output_data)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(SqrtTest, SimpleSqrt)
+{
+  Check(
+      /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+      /*input_data=*/
+      {
+          0, 8, 2, 4,    //
+          3, 7, 10, 0.3, //
+      },
+      /*output_data=*/
+      {
+          0.0, 2.8284271, 1.4142136, 2,                //
+          1.7320508, 2.6457513, 3.1622777, 0.54772256, //
+      });
+}
+
+TEST(SqrtTest, Input_Output_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Sqrt kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(AddTest, Invalid_Input_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Sqrt kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Tanh.cpp b/compiler/luci-interpreter/src/kernels/Tanh.cpp
new file mode 100644
index 000000000..b649d5d2f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Tanh.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Tanh.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Tanh::Tanh(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Tanh::configure()
+{
+  assert(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::U8)
+  {
+    populateLookupTable();
+  }
+  output()->resize(input()->shape());
+}
+
+void Tanh::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Tanh::evalFloat() const
+{
+  tflite::reference_ops::Tanh(getTensorShape(input()), getTensorData<float>(input()),
+                              getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void Tanh::evalQuantized() const
+{
+  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+  uint8_t *output_data = getTensorData<uint8_t>(output());
+  const uint8_t *input_data = getTensorData<uint8_t>(input());
+  for (int i = 0; i < size; ++i)
+  {
+    output_data[i] = getTableValue(input_data[i]);
+  }
+}
+
+void Tanh::populateLookupTable()
+{
+  const auto input_scale = static_cast<double>(input()->scale());
+  const auto input_zero_point = static_cast<int32_t>(input()->zero_point());
+  const auto output_scale = static_cast<double>(output()->scale());
+  const auto output_zero_point = static_cast<int32_t>(output()->zero_point());
+  const float inverse_scale = 1 / output_scale;
+  int32_t maxval = std::numeric_limits<uint8_t>::max();
+  int32_t minval = std::numeric_limits<uint8_t>::min();
+  for (int32_t val = minval; val <= maxval; ++val)
+  {
+    const float dequantized = input_scale * (val - input_zero_point);
+    const float transformed = std::tanh(dequantized);
+    const float rescaled = std::round(transformed * inverse_scale);
+    const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
+    setTableValue(static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)),
+                  static_cast<uint8_t>(val));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Tanh.h b/compiler/luci-interpreter/src/kernels/Tanh.h
new file mode 100644
index 000000000..8017c9638
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Tanh.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TANH_H
+#define LUCI_INTERPRETER_KERNELS_TANH_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Tanh : public Kernel
+{
+public:
+  Tanh(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void populateLookupTable();
+  void setTableValue(uint8_t value, uint8_t idx) { _table[idx] = value; };
+  uint8_t getTableValue(uint8_t idx) const { return _table[idx]; };
+
+private:
+  uint8_t _table[256]{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TANH_H
diff --git a/compiler/luci-interpreter/src/kernels/Tanh.test.cpp b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp
new file mode 100644
index 000000000..392b8672d
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Tanh.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(TanhTest, Float)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+      0, -6, 2,  4, //
+      3, -2, 10, 1, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tanh kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+      0,          -0.9999877, 0.9640275, 0.999329,  //
+      0.99505475, -0.9640275, 1,         0.7615941, //
+  };
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+}
+
+TEST(TanhTest, Uint8)
+{
+  float kMin = -1;
+  float kMax = 127.f / 128.f;
+  float kTanhTolerance = 2 * (1. / 256);
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(8 * kMin, 8 * kMax);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(kMin, kMax);
+  std::vector<float> input_data{
+      0,  -6, 2, 4, //
+      -4, -2, 8, 1, //
+      0,  -6, 2, 4, //
+      -4, -2, 8, 1, //
+      0,  -6, 2, 4, //
+      -4, -2, 8, 1, //
+      0,  -6, 2, 4, //
+      -4, -2, 8, 1, //
+      0,  -6, 2, 4, //
+      -4, -2, 8, 1, //
+      0,  -6, 2, 4, //
+      -4, -2, 8, 1, //
+  };
+  Tensor input_tensor{
+      DataType::U8, {2, 6, 4, 1}, {{input_quant_param.first}, {input_quant_param.second}}, ""};
+  Tensor output_tensor =
+      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+  std::vector<uint8_t> quantize_input =
+      quantize<uint8_t>(input_data, input_quant_param.first, input_quant_param.second);
+  input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t));
+
+  Tanh kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+      0.0,       -0.999987, 0.964027, 0.999329, //
+      -0.999329, -0.96402,  0.99999,  0.76159,  //
+      0.0,       -0.999987, 0.964027, 0.999329, //
+      -0.999329, -0.96402,  0.99999,  0.76159,  //
+      0.0,       -0.999987, 0.964027, 0.999329, //
+      -0.999329, -0.96402,  0.99999,  0.76159,  //
+      0.0,       -0.999987, 0.964027, 0.999329, //
+      -0.999329, -0.96402,  0.99999,  0.76159,  //
+      0.0,       -0.999987, 0.964027, 0.999329, //
+      -0.999329, -0.96402,  0.99999,  0.76159,  //
+      0.0,       -0.999987, 0.964027, 0.999329, //
+      -0.999329, -0.96402,  0.99999,  0.76159,  //
+  };
+  std::vector<int32_t> ref_output_shape{2, 6, 4, 1};
+  EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
+                                  output_tensor.zero_point()),
+              ElementsAreArray(ArrayFloatNear(ref_output_data, kTanhTolerance)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
index 46380e2fa..898bae3da 100644
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
@@ -30,8 +30,8 @@ namespace kernels
 {
 
 TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
-                             Tensor *output, const TransposeConvParams &params)
-    : KernelWithParams<TransposeConvParams>({output_shape, filter, input}, {output}, params)
+                             const Tensor *bias, Tensor *output, const TransposeConvParams &params)
+    : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias}, {output}, params)
 {
 }
 
@@ -106,8 +106,9 @@ void TransposeConv::evalFloat() const
   op_params.output_multiplier = _output_multiplier;
   tflite::reference_ops::TransposeConv(
       op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
-      getTensorData<float>(filter()), getTensorShape(output()), getTensorData<float>(output()),
-      tflite::RuntimeShape(), (float *)nullptr);
+      getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
+      getTensorShape(output()), getTensorData<float>(output()), tflite::RuntimeShape(),
+      (float *)nullptr);
 }
 
 void TransposeConv::evalQuantized() const
@@ -145,8 +146,9 @@ void TransposeConv::evalQuantized() const
 
   tflite::reference_ops::TransposeConv(
       op_params, getTensorShape(input()), getTensorData<uint8>(input()), getTensorShape(filter()),
-      getTensorData<uint8>(filter()), getTensorShape(output()), getTensorData<uint8>(output()),
-      tflite::RuntimeShape(), (uint8 *)nullptr, getTensorData<int32_t>(_scratch_tensor.get()));
+      getTensorData<uint8>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+      getTensorShape(output()), getTensorData<uint8>(output()), tflite::RuntimeShape(),
+      (uint8 *)nullptr, getTensorData<int32_t>(_scratch_tensor.get()));
 }
 
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.h b/compiler/luci-interpreter/src/kernels/TransposeConv.h
index d73e939b7..3a0eae761 100644
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.h
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.h
@@ -29,11 +29,12 @@ class TransposeConv : public KernelWithParams<TransposeConvParams>
 {
 public:
   TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
-                Tensor *output, const TransposeConvParams &params);
+                const Tensor *bias, Tensor *output, const TransposeConvParams &params);
 
   const Tensor *output_shape() const { return _inputs[0]; }
   const Tensor *filter() const { return _inputs[1]; }
   const Tensor *input() const { return _inputs[2]; }
+  const Tensor *bias() const { return _inputs[3]; }
   Tensor *output() const { return _outputs[0]; }
 
   void configure() override;
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
index b8c0ac497..0fbe9328b 100644
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
@@ -26,15 +26,15 @@ namespace
 
 using namespace testing;
 
-template <typename T>
+template <typename T, typename B>
 void Check(std::initializer_list<int32_t> output_shape_shape,
            std::initializer_list<int32_t> weight_shape,
            std::initializer_list<int32_t> input_data_shape,
-           std::initializer_list<int32_t> output_shape,
+           std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
            std::initializer_list<int32_t> output_shape_data, std::initializer_list<T> weight_data,
-           std::initializer_list<T> input_data_data, std::initializer_list<T> output_data,
-           luci::Padding padding, int32_t stride_height, int32_t stride_width,
-           DataType element_type)
+           std::initializer_list<T> input_data_data, std::initializer_list<B> bias_data,
+           std::initializer_list<T> output_data, luci::Padding padding, int32_t stride_height,
+           int32_t stride_width, DataType element_type)
 {
   Tensor output_shape_tensor{element_type, output_shape_shape, {}, ""};
   output_shape_tensor.writeData(output_shape_data.begin(), output_shape_data.size() * sizeof(T));
@@ -50,21 +50,32 @@ void Check(std::initializer_list<int32_t> output_shape_shape,
   params.stride_height = stride_height;
   params.stride_width = stride_width;
 
-  TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &output_tensor,
-                       params);
-  kernel.configure();
-  kernel.execute();
-
+  if (bias_data.size() != 0)
+  {
+    Tensor bias_tensor = makeInputTensor<getElementType<B>()>(bias_shape, bias_data);
+    TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &bias_tensor,
+                         &output_tensor, params);
+    kernel.configure();
+    kernel.execute();
+  }
+  else
+  {
+    TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, nullptr,
+                         &output_tensor, params);
+    kernel.configure();
+    kernel.execute();
+  }
   EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
 }
 
 TEST(TransposeConvTest, FloatSimple)
 {
-  Check<float>(
+  Check<float, float>(
       /*outputShape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1},
-      /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
+      /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
       /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9},
       /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+      /*bias_data=*/{},
       /*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365},
       /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1,
       getElementType<float>());
@@ -74,12 +85,13 @@ TEST(TransposeConvTest, FloatSimple)
 
 TEST(TransposeConvTest, FloatTwoFiltersTest)
 {
-  Check<float>(
+  Check<float, float>(
       /*outputShape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2},
-      /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
+      /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
       /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18},
       /*input_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
                       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
+      /*bias_data=*/{},
       /*output_data=*/{184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968,
                        3352, 3652, 2760},
       /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1,
@@ -88,6 +100,24 @@ TEST(TransposeConvTest, FloatTwoFiltersTest)
   SUCCEED();
 }
 
+TEST(TransposeConvTest, SimpleBiasTest)
+{
+  Check<float, float>(
+      /*outputShape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1},
+      /*input_shape=*/{1, 2, 2, 1},
+      /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 5, 5, 2},
+      /*weight_data=*/{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18},
+      /*input_data=*/{1, 2, 3, 4},
+      /*bias_data=*/{3, 4},
+      /*output_data=*/{4,  6,  6,  8,  10, 14, 9,  12, 13, 16, 10,  12,  12, 14, 28, 32, 21,
+                       24, 25, 28, 19, 24, 27, 32, 65, 76, 45, 52,  57,  64, 24, 28, 30, 34,
+                       64, 72, 39, 44, 47, 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76},
+      /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2,
+      getElementType<float>());
+
+  SUCCEED();
+}
+
 // TODO Uint8Simple
 // Implement GetDequantizedOutput Function.
 // Create Test for Uint8 Case
diff --git a/compiler/luci-interpreter/src/kernels/Utils.h b/compiler/luci-interpreter/src/kernels/Utils.h
index 3c2cc8450..7927151c6 100644
--- a/compiler/luci-interpreter/src/kernels/Utils.h
+++ b/compiler/luci-interpreter/src/kernels/Utils.h
@@ -31,6 +31,11 @@ namespace luci_interpreter
 namespace kernels
 {
 
+#define LUCI_INTERPRETER_CHECK(cond)                                                         \
+  if (!(cond))                                                                               \
+    throw std::runtime_error(std::string(__FILE__) + ":" + std::to_string(__LINE__) + +"(" + \
+                             std::string(#cond) + ") was not true.");
+
 inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
                               int32_t filter_size, int32_t out_size)
 {
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
index 12c7f4526..126a1cb5b 100644
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
@@ -37,12 +37,15 @@
 #include "kernels/Pad.h"
 #include "kernels/Reshape.h"
 #include "kernels/Reverse.h"
+#include "kernels/Rsqrt.h"
 #include "kernels/Slice.h"
 #include "kernels/Softmax.h"
 #include "kernels/SpaceToDepth.h"
 #include "kernels/Split.h"
 #include "kernels/StridedSlice.h"
+#include "kernels/Sqrt.h"
 #include "kernels/Squeeze.h"
+#include "kernels/Tanh.h"
 #include "kernels/Unpack.h"
 #include "kernels/Transpose.h"
 #include "kernels/TransposeConv.h"
@@ -430,6 +433,16 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleReverseV2 *node)
   return std::make_unique<kernels::Reverse>(input, axes, output);
 }
 
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleRsqrt *node)
+{
+  assert(node->arity() == 1);
+
+  const Tensor *input = getInputTensor(node->x());
+  Tensor *output = getOutputTensor(node);
+
+  return std::make_unique<kernels::Rsqrt>(input, output);
+}
+
 std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSlice *node)
 {
   assert(node->arity() == 3);
@@ -483,6 +496,16 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSplit *node)
   return std::make_unique<kernels::Split>(axis, input, std::move(outputs));
 }
 
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqrt *node)
+{
+  assert(node->arity() == 1);
+
+  const Tensor *input = getInputTensor(node->x());
+  Tensor *output = getOutputTensor(node);
+
+  return std::make_unique<kernels::Sqrt>(input, output);
+}
+
 std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqueeze *node)
 {
   assert(node->arity() == 1);
@@ -517,6 +540,16 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleStridedSlice *nod
   return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
 }
 
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTanh *node)
+{
+  assert(node->arity() == 1);
+
+  const Tensor *input = getInputTensor(node->x());
+  Tensor *output = getOutputTensor(node);
+
+  return std::make_unique<kernels::Tanh>(input, output);
+}
+
 std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTranspose *node)
 {
   assert(node->arity() == 2);
@@ -530,11 +563,12 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTranspose *node)
 
 std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTransposeConv *node)
 {
-  assert(node->arity() == 3);
+  assert(node->arity() == 4);
 
   const Tensor *input_sizes = getInputTensor(node->inputSizes());
   const Tensor *filter = getInputTensor(node->filter());
   const Tensor *out_backprop = getInputTensor(node->outBackprop());
+  const Tensor *bias = getOptionalInputTensor(node->bias());
 
   Tensor *output = getOutputTensor(node);
 
@@ -543,7 +577,7 @@ std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTransposeConv *no
   params.stride_height = node->stride()->h();
   params.stride_width = node->stride()->w();
 
-  return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, output,
+  return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output,
                                                   params);
 }
 
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.h b/compiler/luci-interpreter/src/loader/KernelBuilder.h
index d5c5a4b56..31cb9d8fc 100644
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.h
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.h
@@ -63,12 +63,15 @@ public:
   std::unique_ptr<Kernel> visit(const luci::CirclePad *node) override;
   std::unique_ptr<Kernel> visit(const luci::CircleReshape *node) override;
   std::unique_ptr<Kernel> visit(const luci::CircleReverseV2 *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleRsqrt *node) override;
   std::unique_ptr<Kernel> visit(const luci::CircleSlice *node) override;
   std::unique_ptr<Kernel> visit(const luci::CircleSoftmax *node) override;
   std::unique_ptr<Kernel> visit(const luci::CircleSpaceToDepth *node) override;
   std::unique_ptr<Kernel> visit(const luci::CircleSplit *node) override;
   std::unique_ptr<Kernel> visit(const luci::CircleStridedSlice *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleSqrt *node) override;
   std::unique_ptr<Kernel> visit(const luci::CircleSqueeze *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleTanh *node) override;
   std::unique_ptr<Kernel> visit(const luci::CircleTranspose *node) override;
   std::unique_ptr<Kernel> visit(const luci::CircleTransposeConv *node) override;
   std::unique_ptr<Kernel> visit(const luci::CircleUnpack *node) override;
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
index 33bc8ec9b..4e2bc3d0b 100644
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
@@ -37,12 +37,15 @@
 #include <kernels/Pad.h>
 #include <kernels/Reshape.h>
 #include <kernels/Reverse.h>
+#include <kernels/Rsqrt.h>
 #include <kernels/Slice.h>
 #include <kernels/Softmax.h>
 #include <kernels/SpaceToDepth.h>
 #include <kernels/Split.h>
+#include <kernels/Sqrt.h>
 #include <kernels/Squeeze.h>
 #include <kernels/StridedSlice.h>
+#include <kernels/Tanh.h>
 #include <kernels/Transpose.h>
 #include <kernels/TransposeConv.h>
 #include <kernels/Unpack.h>
@@ -529,6 +532,20 @@ TEST_F(KernelBuilderTest, ReverseV2)
   checkTensor(kernel->output(), op);
 }
 
+TEST_F(KernelBuilderTest, Rsqrt)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleRsqrt>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Rsqrt>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
 TEST_F(KernelBuilderTest, Slice)
 {
   auto *input = createInputNode();
@@ -605,6 +622,20 @@ TEST_F(KernelBuilderTest, Split)
   checkTensor(kernel->output(1), output2);
 }
 
+TEST_F(KernelBuilderTest, Sqrt)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleSqrt>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Sqrt>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
 TEST_F(KernelBuilderTest, Squeeze)
 {
   auto *input = createInputNode();
@@ -656,6 +687,20 @@ TEST_F(KernelBuilderTest, StridedSlice)
   EXPECT_THAT(kernel->params().shrink_axis_mask, Eq(op->shrink_axis_mask()));
 }
 
+TEST_F(KernelBuilderTest, Tanh)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleTanh>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Tanh>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
 TEST_F(KernelBuilderTest, Transpose)
 {
   auto *input = createInputNode();
@@ -678,11 +723,13 @@ TEST_F(KernelBuilderTest, TransposeConv)
   auto *output_shape = createInputNode();
   auto *filter = createInputNode();
   auto *input = createInputNode();
+  auto *bias = createInputNode();
 
   auto *op = createNode<luci::CircleTransposeConv>();
   op->inputSizes(output_shape);
   op->filter(filter);
   op->outBackprop(input);
+  op->bias(bias);
 
   op->padding(luci::Padding::SAME);
   op->stride()->h(11);
@@ -695,6 +742,7 @@ TEST_F(KernelBuilderTest, TransposeConv)
   checkTensor(kernel->filter(), filter);
   checkTensor(kernel->input(), input);
   checkTensor(kernel->output(), op);
+  checkTensor(kernel->bias(), bias);
   EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
   EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
   EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
diff --git a/compiler/luci-value-test/tester/CMakeLists.txt b/compiler/luci-value-test/tester/CMakeLists.txt
index f3b6dfcfe..f2a4ff4b6 100644
--- a/compiler/luci-value-test/tester/CMakeLists.txt
+++ b/compiler/luci-value-test/tester/CMakeLists.txt
@@ -1,8 +1,6 @@
 
 set(SRCS_EVAL_TESTER
       src/EvalTester.cpp
-      src/CircleExpContract.h
-      src/CircleExpContract.cpp
    )
 
 add_executable(luci_eval_tester ${SRCS_EVAL_TESTER})
diff --git a/compiler/luci-value-test/tester/src/CircleExpContract.h b/compiler/luci-value-test/tester/src/CircleExpContract.h
deleted file mode 100644
index 4d08fb89b..000000000
--- a/compiler/luci-value-test/tester/src/CircleExpContract.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_VALUE_TEST_CIRCLEXPCONTRACT_H__
-#define __LUCI_VALUE_TEST_CIRCLEXPCONTRACT_H__
-
-#include <loco.h>
-#include <luci/CircleExporter.h>
-#include <luci/IR/Module.h>
-
-#include <memory>
-#include <string>
-
-struct CircleExpContract : public luci::CircleExporter::Contract
-{
-public:
-  CircleExpContract(luci::Module *module, const std::string &filename)
-      : _module(module), _filepath(filename)
-  {
-    // NOTHING TO DO
-  }
-  virtual ~CircleExpContract() = default;
-
-public:
-  loco::Graph *graph(void) const final { return nullptr; }
-  luci::Module *module(void) const final { return _module; };
-
-public:
-  bool store(const char *ptr, const size_t size) const final;
-
-private:
-  luci::Module *_module;
-  const std::string _filepath;
-};
-
-#endif // __LUCI_VALUE_TEST_CIRCLEXPCONTRACT_H__
diff --git a/compiler/luci-value-test/tester/src/EvalTester.cpp b/compiler/luci-value-test/tester/src/EvalTester.cpp
index 09eef223a..b49602e5e 100644
--- a/compiler/luci-value-test/tester/src/EvalTester.cpp
+++ b/compiler/luci-value-test/tester/src/EvalTester.cpp
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "CircleExpContract.h"
-
 #include <luci/Importer.h>
 #include <luci_interpreter/Interpreter.h>
+#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
 
 #include <cstdlib>
 #include <fstream>
@@ -104,7 +104,9 @@ int entry(int argc, char **argv)
 
   // Export to a Circle file
   luci::CircleExporter exporter;
-  CircleExpContract contract(initial_module.get(), intermediate_filename);
+
+  luci::CircleFileExpContract contract(initial_module.get(), intermediate_filename);
+
   if (!exporter.invoke(&contract))
   {
     std::cerr << "ERROR: Failed to export '" << intermediate_filename << "'" << std::endl;
diff --git a/compiler/record-minmax/src/CircleExpContract.h b/compiler/luci/export/include/luci/CircleFileExpContract.h
index ab00fa860..eeaf2d9bb 100644
--- a/compiler/record-minmax/src/CircleExpContract.h
+++ b/compiler/luci/export/include/luci/CircleFileExpContract.h
@@ -14,40 +14,52 @@
  * limitations under the License.
  */
 
-#ifndef __RECORD_MINMAX_CIRCLEXPCONTRACT_H__
-#define __RECORD_MINMAX_CIRCLEXPCONTRACT_H__
+#ifndef __LUCI_CIRCLEFILEEXPCONTRACT_H__
+#define __LUCI_CIRCLEFILEEXPCONTRACT_H__
 
 #include <loco.h>
 #include <luci/CircleExporter.h>
 #include <luci/IR/Module.h>
+#include <oops/InternalExn.h>
 
 #include <string>
+#include <fstream>
+#include <iostream>
 
-namespace record_minmax
+namespace luci
 {
 
-struct CircleExpContract : public luci::CircleExporter::Contract
+struct CircleFileExpContract : public luci::CircleExporter::Contract
 {
 public:
-  CircleExpContract(luci::Module *module, const std::string &filename)
+  CircleFileExpContract(luci::Module *module, const std::string &filename)
       : _module(module), _filepath(filename)
   {
     // NOTHING TO DO
   }
-  virtual ~CircleExpContract() = default;
+  virtual ~CircleFileExpContract() = default;
 
 public:
   loco::Graph *graph(void) const final { return nullptr; }
-  luci::Module *module(void) const final { return _module; };
+  luci::Module *module(void) const final { return _module; }
 
 public:
-  bool store(const char *ptr, const size_t size) const final;
+  bool store(const char *ptr, const size_t size) const final
+  {
+    if (!ptr)
+      INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
+
+    std::ofstream fs(_filepath, std::ofstream::binary);
+    fs.write(ptr, size);
+
+    return fs.good();
+  }
 
 private:
   luci::Module *_module;
   const std::string _filepath;
 };
 
-} // namespace record_minmax
+} // namespace luci
 
-#endif // __RECORD_MINMAX_CIRCLEXPCONTRACT_H__
+#endif // __LUCI_CIRCLEFILEEXPCONTRACT_H__
diff --git a/compiler/luci/export/src/CircleOperationExporter.cpp b/compiler/luci/export/src/CircleOperationExporter.cpp
index bca122050..36d61f6c9 100644
--- a/compiler/luci/export/src/CircleOperationExporter.cpp
+++ b/compiler/luci/export/src/CircleOperationExporter.cpp
@@ -38,12 +38,578 @@ namespace
 
 using namespace luci;
 
+struct ExportContext
+{
+  FlatBufferBuilder &builder;
+  SerializedModelData &md;
+  SerializedGraphData &gd;
+};
+
+/**
+ * @brief Exports CircleMaxPool2D or CircleAveragePool2D
+ *
+ * @note  CirclePool2D should be one of CircleMaxPool2D or CircleAveragePool2D
+ */
+template <class CirclePool2D>
+void export_pool_2d(ExportContext &ctx, CirclePool2D *node, circle::BuiltinOperator builtin_op)
+{
+  LUCI_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D ||
+                  builtin_op == circle::BuiltinOperator_L2_POOL_2D ||
+                  builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
+              "Should be L2Pool, MaxPool or AvgPool");
+  LUCI_ASSERT(node->padding() != luci::Padding::UNDEFINED, "Padding is not set");
+
+  uint32_t op_idx = ctx.md.registerBuiltinOpcode(builtin_op, node->op_version());
+  std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
+  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+
+  circle::Padding padding = getOpPadding(node->padding());
+
+  auto options = CreatePool2DOptions(ctx.builder, padding, node->stride()->w(), node->stride()->h(),
+                                     node->filter()->w(), node->filter()->h(),
+                                     to_circle_actfunc(node->fusedActivationFunction()));
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_Pool2DOptions, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+/**
+ * @brief export simple nodes
+ */
+void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator bop,
+                 circle::BuiltinOptions bot, flatbuffers::Offset<void> options_offset)
+{
+  uint32_t op_idx =
+      ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
+  std::vector<int32_t> inputs_vec;
+  std::vector<int32_t> outputs_vec{get_tensor_index(node)};
+  for (uint32_t i = 0; i < node->arity(); ++i)
+    inputs_vec.push_back(get_tensor_index(node->arg(i)));
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, bot, options_offset);
+  ctx.gd._operators.push_back(op_offset);
+}
+
+/**
+ * @brief export simple nodes having void options
+ */
+void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator bop)
+{
+  uint32_t op_idx =
+      ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
+  std::vector<int32_t> inputs_vec;
+  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+  for (uint32_t i = 0; i < node->arity(); ++i)
+    inputs_vec.push_back(get_tensor_index(node->arg(i)));
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs);
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleAddN *node)
+{
+  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_ADD_N, node->op_version());
+  std::vector<int32_t> inputs_vec;
+  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+
+  for (uint32_t i = 0; i < node->arity(); ++i)
+    inputs_vec.push_back(get_tensor_index(node->inputs(i)));
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateAddNOptions(ctx.builder);
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_AddNOptions, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleCast *node)
+{
+  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CAST, node->op_version());
+  std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
+  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+
+  flatbuffers::Offset<Operator> op_offset;
+  if (node->out_data_type() != loco::DataType::Unknown)
+  {
+    auto options = CreateCastOptions(ctx.builder, to_circle_tensortype(node->in_data_type()),
+                                     to_circle_tensortype(node->out_data_type()));
+    op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                               circle::BuiltinOptions_CastOptions, options.Union());
+  }
+  else
+  {
+    op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs);
+  }
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleConcatenation *node)
+{
+  uint32_t op_idx =
+      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION, node->op_version());
+  std::vector<int32_t> inputs_vec;
+  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+
+  for (uint32_t i = 0; i < node->numValues(); ++i)
+    inputs_vec.push_back(get_tensor_index(node->values(i)));
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateConcatenationOptions(ctx.builder, node->axis(),
+                                            to_circle_actfunc(node->fusedActivationFunction()));
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_ConcatenationOptions, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleCustom *node)
+{
+  auto custom_outputs = loco::succs(node);
+
+  uint32_t op_idx = ctx.md.registerCustomOpcode(node->custom_code());
+  std::vector<int32_t> inputs_vec;
+  std::vector<int32_t> outputs_vec;
+
+  for (uint32_t index = 0; index < node->numInputs(); index++)
+  {
+    inputs_vec.push_back(get_tensor_index(node->inputs(index)));
+  }
+  for (uint32_t index = 0; index < custom_outputs.size(); index++)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : custom_outputs)
+    {
+      auto custom_out = loco::must_cast<luci::CircleCustomOut *>(out);
+      if (custom_out->index() == static_cast<int32_t>(index))
+      {
+        outputs_vec.push_back(get_tensor_index(custom_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid Custom output");
+    }
+  }
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  flatbuffers::Offset<flatbuffers::Vector<uint8_t>> circle_custom_options;
+  std::vector<uint8_t> custom_options_vec{node->custom_options().begin(),
+                                          node->custom_options().end()};
+  circle_custom_options = ctx.builder.CreateVector(custom_options_vec);
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, circle::BuiltinOptions_NONE,
+                                  flatbuffers::Offset<void>(), circle_custom_options);
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleIf *node)
+{
+  auto if_outs = loco::succs(node);
+  assert(if_outs.size() == node->output_count());
+
+  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_IF, node->op_version());
+  std::vector<int32_t> inputs_vec;
+  std::vector<int32_t> outputs_vec;
+
+  inputs_vec.push_back(get_tensor_index(node->cond()));
+  for (uint32_t idx = 0; idx < node->input_count(); ++idx)
+    inputs_vec.push_back(get_tensor_index(node->input(idx)));
+
+  for (uint32_t idx = 0; idx < node->output_count(); ++idx)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : if_outs)
+    {
+      auto if_out = loco::must_cast<luci::CircleIfOut *>(out);
+      if (if_out->index() == static_cast<int32_t>(idx))
+      {
+        outputs_vec.push_back(get_tensor_index(if_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid CircleIf output");
+    }
+  }
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateIfOptions(ctx.builder, node->then_branch(), node->else_branch());
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_IfOptions, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV4 *node)
+{
+  auto nms_outs = loco::succs(node);
+  assert(nms_outs.size() == 2);
+
+  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V4,
+                                                 node->op_version());
+  std::vector<int32_t> inputs_vec{
+      get_tensor_index(node->boxes()),           get_tensor_index(node->scores()),
+      get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
+      get_tensor_index(node->score_threshold()),
+  };
+  std::vector<int32_t> outputs_vec;
+
+  for (uint32_t idx = 0; idx < nms_outs.size(); ++idx)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : nms_outs)
+    {
+      auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(out);
+      if (nms_out->index() == static_cast<int32_t>(idx))
+      {
+        outputs_vec.push_back(get_tensor_index(nms_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid NonMaxSuppressionV4 output");
+    }
+  }
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateNonMaxSuppressionV4Options(ctx.builder);
+  auto op_offset =
+      CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                     circle::BuiltinOptions_NonMaxSuppressionV4Options, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV5 *node)
+{
+  auto nms_outs = loco::succs(node);
+  assert(nms_outs.size() == 3);
+
+  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V5,
+                                                 node->op_version());
+  std::vector<int32_t> inputs_vec{
+      get_tensor_index(node->boxes()),           get_tensor_index(node->scores()),
+      get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
+      get_tensor_index(node->score_threshold()), get_tensor_index(node->soft_nms_sigma()),
+  };
+  std::vector<int32_t> outputs_vec;
+
+  for (uint32_t idx = 0; idx < nms_outs.size(); ++idx)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : nms_outs)
+    {
+      auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV5Out *>(out);
+      if (nms_out->index() == static_cast<int32_t>(idx))
+      {
+        outputs_vec.push_back(get_tensor_index(nms_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid NonMaxSuppressionV5 output");
+    }
+  }
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateNonMaxSuppressionV5Options(ctx.builder);
+  auto op_offset =
+      CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                     circle::BuiltinOptions_NonMaxSuppressionV5Options, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleReverseV2 *node)
+{
+  uint32_t op_idx =
+      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_REVERSE_V2, node->op_version());
+  std::vector<int32_t> inputs_vec{get_tensor_index(node->tensor()), get_tensor_index(node->axis())};
+  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateReverseV2Options(ctx.builder);
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_ReverseSequenceOptions, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleSplit *node)
+{
+  auto split_outs = loco::succs(node);
+  assert(int32_t(split_outs.size()) == node->num_split());
+
+  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT, node->op_version());
+  // NOTE BuiltinOperator_SPLIT input is placed at second position
+  std::vector<int32_t> inputs_vec{get_tensor_index(node->split_dim()),
+                                  get_tensor_index(node->input())};
+  std::vector<int32_t> outputs_vec;
+
+  for (int32_t index = 0; index < node->num_split(); index++)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : split_outs)
+    {
+      auto split_out = loco::must_cast<luci::CircleSplitOut *>(out);
+      if (split_out->index() == index)
+      {
+        outputs_vec.push_back(get_tensor_index(split_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid Split output");
+    }
+  }
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateSplitOptions(ctx.builder, node->num_split());
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_SplitOptions, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleSplitV *node)
+{
+  auto split_outs = loco::succs(node);
+  assert(int32_t(split_outs.size()) == node->num_split());
+
+  uint32_t op_idx =
+      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT_V, node->op_version());
+  std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
+                                  get_tensor_index(node->size_splits()),
+                                  get_tensor_index(node->split_dim())};
+  std::vector<int32_t> outputs_vec;
+
+  for (int32_t index = 0; index < node->num_split(); index++)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : split_outs)
+    {
+      auto split_out = loco::must_cast<luci::CircleSplitVOut *>(out);
+      if (split_out->index() == index)
+      {
+        outputs_vec.push_back(get_tensor_index(split_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid SplitV output");
+    }
+  }
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateSplitVOptions(ctx.builder, node->num_split());
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_SplitVOptions, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleTopKV2 *node)
+{
+  auto topkv2_outs = loco::succs(node);
+  int outs_count = int32_t(topkv2_outs.size());
+  assert(outs_count == 2);
+
+  uint32_t op_idx =
+      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_TOPK_V2, node->op_version());
+  std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->k())};
+  std::vector<int32_t> outputs_vec;
+
+  for (int32_t index = 0; index < outs_count; index++)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : topkv2_outs)
+    {
+      auto topkv2_out = loco::must_cast<luci::CircleTopKV2Out *>(out);
+      if (topkv2_out->index() == index)
+      {
+        outputs_vec.push_back(get_tensor_index(topkv2_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid TopKV2 output");
+    }
+  }
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateTopKV2Options(ctx.builder);
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_TopKV2Options, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleUnique *node)
+{
+  auto unique_outs = loco::succs(node);
+  assert(int32_t(unique_outs.size()) == 2);
+  uint32_t op_idx =
+      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNIQUE, node->op_version());
+
+  std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
+  std::vector<int32_t> outputs_vec;
+
+  for (int32_t index = 0; index < 2; index++)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : unique_outs)
+    {
+      auto unique_out = loco::must_cast<luci::CircleUniqueOut *>(out);
+      if (unique_out->index() == index)
+      {
+        outputs_vec.push_back(get_tensor_index(unique_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid Unique output");
+    }
+  }
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateUniqueOptions(ctx.builder, to_circle_tensortype(node->idx_out_type()));
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_UniqueOptions, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleUnpack *node)
+{
+  LOGGER(l);
+  auto settings = luci::UserSettings::settings();
+
+  auto unpack_outs = loco::succs(node);
+  // NOTE real models may not use all of the outputs
+  if (static_cast<int32_t>(unpack_outs.size()) != node->num())
+  {
+    if (settings->get(luci::UserSettings::Key::DisableValidation))
+    {
+      WARN(l) << "Warning: export Unpack(" << node->name() << ") 'num' not same as outputs";
+    }
+    else
+      assert(false);
+  }
+
+  uint32_t op_idx =
+      ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNPACK, node->op_version());
+  std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
+  std::vector<int32_t> outputs_vec;
+
+  for (int32_t index = 0; index < node->num(); index++)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : unpack_outs)
+    {
+      auto unpack_out = loco::must_cast<luci::CircleUnpackOut *>(out);
+      if (unpack_out->index() == index)
+      {
+        outputs_vec.push_back(get_tensor_index(unpack_out));
+        found = true;
+        break;
+      }
+    }
+    // NOTE real models may not use all of the outputs
+    if (!found)
+    {
+      if (settings->get(luci::UserSettings::Key::DisableValidation))
+      {
+        WARN(l) << "Warning: export Unpack(" << node->name() << ") output " << index << " not used";
+      }
+      else
+        assert(false);
+    }
+  }
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateUnpackOptions(ctx.builder, node->num(), node->axis());
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_UnpackOptions, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
+void export_node(ExportContext &ctx, luci::CircleWhile *node)
+{
+  auto while_outs = loco::succs(node);
+  assert(while_outs.size() == node->output_count());
+
+  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_WHILE, node->op_version());
+  std::vector<int32_t> inputs_vec;
+  std::vector<int32_t> outputs_vec;
+
+  for (uint32_t idx = 0; idx < node->input_count(); ++idx)
+    inputs_vec.push_back(get_tensor_index(node->input(idx)));
+
+  for (uint32_t idx = 0; idx < node->output_count(); ++idx)
+  {
+    // store in order of index
+    bool found = false;
+    for (auto out : while_outs)
+    {
+      auto while_out = loco::must_cast<luci::CircleWhileOut *>(out);
+      if (while_out->index() == static_cast<int32_t>(idx))
+      {
+        outputs_vec.push_back(get_tensor_index(while_out));
+        found = true;
+        break;
+      }
+    }
+    if (!found)
+    {
+      INTERNAL_EXN("Invalid CircleWhile output");
+    }
+  }
+
+  auto inputs = ctx.builder.CreateVector(inputs_vec);
+  auto outputs = ctx.builder.CreateVector(outputs_vec);
+  auto options = CreateWhileOptions(ctx.builder, node->cond_branch(), node->body_branch());
+  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
+                                  circle::BuiltinOptions_WhileOptions, options.Union());
+  ctx.gd._operators.push_back(op_offset);
+}
+
 class OperationExporter final : public luci::CircleNodeMutableVisitor<void>,
                                 public loco::CanonicalNodeMutableVisitor<void>
 {
 public:
-  OperationExporter(FlatBufferBuilder &fbb, SerializedModelData &m, SerializedGraphData &g)
-      : builder{fbb}, md{m}, gd{g}
+  OperationExporter(ExportContext &ctx) : _ctx{ctx}
   {
     // DO NOTHING
   }
@@ -103,10 +669,12 @@ public:
   void visit(luci::CircleMul *) final;
   void visit(luci::CircleNeg *) final;
   void visit(luci::CircleNonMaxSuppressionV4 *) final;
+  void visit(luci::CircleNonMaxSuppressionV5 *) final;
   void visit(luci::CircleNotEqual *) final;
   void visit(luci::CircleOneHot *) final;
   void visit(luci::CirclePack *) final;
   void visit(luci::CirclePad *) final;
+  void visit(luci::CirclePadV2 *) final;
   void visit(luci::CirclePow *) final;
   void visit(luci::CirclePRelu *) final;
   void visit(luci::CircleRange *) final;
@@ -168,6 +736,7 @@ public:
   void visit(luci::CircleCustomOut *) final {}
   void visit(luci::CircleIfOut *) final {}
   void visit(luci::CircleNonMaxSuppressionV4Out *) final {}
+  void visit(luci::CircleNonMaxSuppressionV5Out *) final {}
   void visit(luci::CircleSplitOut *) final {}
   void visit(luci::CircleSplitVOut *) final {}
   void visit(luci::CircleTopKV2Out *) final {}
@@ -177,14 +746,6 @@ public:
 
 private:
   /**
-   * @brief Exports CircleMaxPool2D or CircleAveragePool2D
-   *
-   * @note  CirclePool2D should be one of CircleMaxPool2D or CircleAveragePool2D
-   */
-  template <class CirclePool2D>
-  void export_pool_2d(CirclePool2D *node, circle::BuiltinOperator builtin_op);
-
-  /**
    * @brief export simple nodes
    */
   void export_simple(loco::Node *node, circle::BuiltinOperator bop, circle::BuiltinOptions bot,
@@ -196,179 +757,83 @@ private:
   void export_simple(loco::Node *node, circle::BuiltinOperator bop);
 
 private:
-  FlatBufferBuilder &builder;
-  SerializedModelData &md;
-  SerializedGraphData &gd;
+  ExportContext &_ctx;
 };
 
-template <class CirclePool2D>
-void OperationExporter::export_pool_2d(CirclePool2D *node, circle::BuiltinOperator builtin_op)
-{
-  LUCI_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D ||
-                  builtin_op == circle::BuiltinOperator_L2_POOL_2D ||
-                  builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
-              "Should be L2Pool, MaxPool or AvgPool");
-  LUCI_ASSERT(node->padding() != luci::Padding::UNDEFINED, "Padding is not set");
-
-  uint32_t op_idx = md.registerBuiltinOpcode(builtin_op, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-
-  circle::Padding padding = getOpPadding(node->padding());
-
-  auto options = CreatePool2DOptions(builder, padding, node->stride()->w(), node->stride()->h(),
-                                     node->filter()->w(), node->filter()->h(),
-                                     to_circle_actfunc(node->fusedActivationFunction()));
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_Pool2DOptions, options.Union());
-  gd._operators.push_back(op_offset);
-}
-
 void OperationExporter::export_simple(loco::Node *node, circle::BuiltinOperator bop,
                                       circle::BuiltinOptions bot,
                                       flatbuffers::Offset<void> options_offset)
 {
-  uint32_t op_idx =
-      md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec{get_tensor_index(node)};
-  for (uint32_t i = 0; i < node->arity(); ++i)
-    inputs_vec.push_back(get_tensor_index(node->arg(i)));
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, bot, options_offset);
-  gd._operators.push_back(op_offset);
+  export_node(_ctx, node, bop, bot, options_offset);
 }
 
 void OperationExporter::export_simple(loco::Node *node, circle::BuiltinOperator bop)
 {
-  uint32_t op_idx =
-      md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-  for (uint32_t i = 0; i < node->arity(); ++i)
-    inputs_vec.push_back(get_tensor_index(node->arg(i)));
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs);
-  gd._operators.push_back(op_offset);
+  export_node(_ctx, node, bop);
 }
 
 void OperationExporter::visit(luci::CircleAbs *node)
 {
   export_simple(node, circle::BuiltinOperator_ABS, circle::BuiltinOptions_AbsOptions,
-                CreateAbsOptions(builder).Union());
+                CreateAbsOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleAdd *node)
 {
   export_simple(
       node, circle::BuiltinOperator_ADD, circle::BuiltinOptions_AddOptions,
-      CreateAddOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+      CreateAddOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
 }
 
-void OperationExporter::visit(luci::CircleAddN *node)
-{
-  uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_ADD_N, node->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-
-  for (uint32_t i = 0; i < node->arity(); ++i)
-    inputs_vec.push_back(get_tensor_index(node->inputs(i)));
-
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateAddNOptions(builder);
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_AddNOptions, options.Union());
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleAddN *node) { export_node(_ctx, node); }
 
 void OperationExporter::visit(luci::CircleArgMax *node)
 {
-  export_simple(node, circle::BuiltinOperator_ARG_MAX, circle::BuiltinOptions_ArgMaxOptions,
-                CreateArgMaxOptions(builder, to_circle_tensortype(node->output_type())).Union());
+  export_simple(
+      node, circle::BuiltinOperator_ARG_MAX, circle::BuiltinOptions_ArgMaxOptions,
+      CreateArgMaxOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
 }
 
 void OperationExporter::visit(luci::CircleArgMin *node)
 {
-  export_simple(node, circle::BuiltinOperator_ARG_MIN, circle::BuiltinOptions_ArgMinOptions,
-                CreateArgMinOptions(builder, to_circle_tensortype(node->output_type())).Union());
+  export_simple(
+      node, circle::BuiltinOperator_ARG_MIN, circle::BuiltinOptions_ArgMinOptions,
+      CreateArgMinOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
 }
 
 void OperationExporter::visit(luci::CircleAveragePool2D *node)
 {
-  export_pool_2d<luci::CircleAveragePool2D>(node, circle::BuiltinOperator_AVERAGE_POOL_2D);
+  export_pool_2d<luci::CircleAveragePool2D>(_ctx, node, circle::BuiltinOperator_AVERAGE_POOL_2D);
 }
 
 void OperationExporter::visit(luci::CircleBatchMatMul *node)
 {
   export_simple(node, circle::BuiltinOperator_BATCH_MATMUL,
                 circle::BuiltinOptions_BatchMatMulOptions,
-                CreateBatchMatMulOptions(builder, node->adj_x(), node->adj_y()).Union());
+                CreateBatchMatMulOptions(_ctx.builder, node->adj_x(), node->adj_y()).Union());
 }
 
-void OperationExporter::visit(luci::CircleCast *node)
-{
-  uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_CAST, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-
-  flatbuffers::Offset<Operator> op_offset;
-  if (node->out_data_type() != loco::DataType::Unknown)
-  {
-    auto options = CreateCastOptions(builder, to_circle_tensortype(node->in_data_type()),
-                                     to_circle_tensortype(node->out_data_type()));
-    op_offset = CreateOperator(builder, op_idx, inputs, outputs, circle::BuiltinOptions_CastOptions,
-                               options.Union());
-  }
-  else
-  {
-    op_offset = CreateOperator(builder, op_idx, inputs, outputs);
-  }
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleCast *node) { export_node(_ctx, node); }
 
 void OperationExporter::visit(luci::CircleCeil *node)
 {
   export_simple(node, circle::BuiltinOperator_CEIL);
 }
 
-void OperationExporter::visit(luci::CircleConcatenation *node)
-{
-  uint32_t op_idx =
-      md.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION, node->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-
-  for (uint32_t i = 0; i < node->numValues(); ++i)
-    inputs_vec.push_back(get_tensor_index(node->values(i)));
-
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateConcatenationOptions(builder, node->axis(),
-                                            to_circle_actfunc(node->fusedActivationFunction()));
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_ConcatenationOptions, options.Union());
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleConcatenation *node) { export_node(_ctx, node); }
 
 void OperationExporter::visit(luci::CircleBatchToSpaceND *node)
 {
   export_simple(node, circle::BuiltinOperator_BATCH_TO_SPACE_ND,
                 circle::BuiltinOptions_BatchToSpaceNDOptions,
-                CreateBatchToSpaceNDOptions(builder).Union());
+                CreateBatchToSpaceNDOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleConv2D *node)
 {
   export_simple(node, circle::BuiltinOperator_CONV_2D, circle::BuiltinOptions_Conv2DOptions,
-                CreateConv2DOptions(builder, getOpPadding(node->padding()), node->stride()->w(),
-                                    node->stride()->h(),
+                CreateConv2DOptions(_ctx.builder, getOpPadding(node->padding()),
+                                    node->stride()->w(), node->stride()->h(),
                                     to_circle_actfunc(node->fusedActivationFunction()),
                                     node->dilation()->w(), node->dilation()->h())
                     .Union());
@@ -377,64 +842,23 @@ void OperationExporter::visit(luci::CircleConv2D *node)
 void OperationExporter::visit(luci::CircleCos *node)
 {
   export_simple(node, circle::BuiltinOperator_COS, circle::BuiltinOptions_CosOptions,
-                CreateCosOptions(builder).Union());
+                CreateCosOptions(_ctx.builder).Union());
 }
 
-void OperationExporter::visit(luci::CircleCustom *node)
-{
-  auto custom_outputs = loco::succs(node);
-
-  uint32_t op_idx = md.registerCustomOpcode(node->custom_code());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec;
-
-  for (uint32_t index = 0; index < node->numInputs(); index++)
-  {
-    inputs_vec.push_back(get_tensor_index(node->inputs(index)));
-  }
-  for (uint32_t index = 0; index < custom_outputs.size(); index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : custom_outputs)
-    {
-      auto custom_out = loco::must_cast<luci::CircleCustomOut *>(out);
-      if (custom_out->index() == static_cast<int32_t>(index))
-      {
-        outputs_vec.push_back(get_tensor_index(custom_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid Custom output");
-    }
-  }
-
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  flatbuffers::Offset<flatbuffers::Vector<uint8_t>> circle_custom_options;
-  std::vector<uint8_t> custom_options_vec{node->custom_options().begin(),
-                                          node->custom_options().end()};
-  circle_custom_options = builder.CreateVector(custom_options_vec);
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs, circle::BuiltinOptions_NONE,
-                                  flatbuffers::Offset<void>(), circle_custom_options);
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleCustom *node) { export_node(_ctx, node); }
 
 void OperationExporter::visit(luci::CircleDepthToSpace *node)
 {
   export_simple(node, circle::BuiltinOperator_DEPTH_TO_SPACE,
                 circle::BuiltinOptions_DepthToSpaceOptions,
-                CreateDepthToSpaceOptions(builder, node->block_size()).Union());
+                CreateDepthToSpaceOptions(_ctx.builder, node->block_size()).Union());
 }
 
 void OperationExporter::visit(luci::CircleDepthwiseConv2D *node)
 {
   export_simple(node, circle::BuiltinOperator_DEPTHWISE_CONV_2D,
                 circle::BuiltinOptions_DepthwiseConv2DOptions,
-                CreateDepthwiseConv2DOptions(builder, getOpPadding(node->padding()),
+                CreateDepthwiseConv2DOptions(_ctx.builder, getOpPadding(node->padding()),
                                              node->stride()->w(), node->stride()->h(),
                                              node->depthMultiplier(),
                                              to_circle_actfunc(node->fusedActivationFunction()),
@@ -446,7 +870,7 @@ void OperationExporter::visit(luci::CircleDiv *node)
 {
   export_simple(
       node, circle::BuiltinOperator_DIV, circle::BuiltinOptions_DivOptions,
-      CreateDivOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+      CreateDivOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
 }
 
 void OperationExporter::visit(luci::CircleElu *node)
@@ -457,25 +881,25 @@ void OperationExporter::visit(luci::CircleElu *node)
 void OperationExporter::visit(luci::CircleEqual *node)
 {
   export_simple(node, circle::BuiltinOperator_EQUAL, circle::BuiltinOptions_EqualOptions,
-                CreateEqualOptions(builder).Union());
+                CreateEqualOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleExp *node)
 {
   export_simple(node, circle::BuiltinOperator_EXP, circle::BuiltinOptions_ExpOptions,
-                CreateExpOptions(builder).Union());
+                CreateExpOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleExpandDims *node)
 {
   export_simple(node, circle::BuiltinOperator_EXPAND_DIMS, circle::BuiltinOptions_ExpandDimsOptions,
-                CreateExpandDimsOptions(builder).Union());
+                CreateExpandDimsOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleFill *node)
 {
   export_simple(node, circle::BuiltinOperator_FILL, circle::BuiltinOptions_FillOptions,
-                CreateFillOptions(builder).Union());
+                CreateFillOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleFloor *node)
@@ -486,124 +910,86 @@ void OperationExporter::visit(luci::CircleFloor *node)
 void OperationExporter::visit(luci::CircleFloorDiv *node)
 {
   export_simple(node, circle::BuiltinOperator_FLOOR_DIV, circle::BuiltinOptions_FloorDivOptions,
-                CreateFloorDivOptions(builder).Union());
+                CreateFloorDivOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleFloorMod *node)
 {
   export_simple(node, circle::BuiltinOperator_FLOOR_MOD, circle::BuiltinOptions_FloorModOptions,
-                CreateFloorModOptions(builder).Union());
+                CreateFloorModOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleFullyConnected *node)
 {
   export_simple(
       node, circle::BuiltinOperator_FULLY_CONNECTED, circle::BuiltinOptions_FullyConnectedOptions,
-      CreateFullyConnectedOptions(builder, to_circle_actfunc(node->fusedActivationFunction()))
+      CreateFullyConnectedOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()))
           .Union());
 }
 
 void OperationExporter::visit(luci::CircleGather *node)
 {
   export_simple(node, circle::BuiltinOperator_GATHER, circle::BuiltinOptions_GatherOptions,
-                CreateGatherOptions(builder, node->axis()).Union());
+                CreateGatherOptions(_ctx.builder, node->axis()).Union());
 }
 
 void OperationExporter::visit(luci::CircleGatherNd *node)
 {
   export_simple(node, circle::BuiltinOperator_GATHER_ND, circle::BuiltinOptions_GatherNdOptions,
-                CreateGatherNdOptions(builder).Union());
+                CreateGatherNdOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleGreater *node)
 {
   export_simple(node, circle::BuiltinOperator_GREATER, circle::BuiltinOptions_GreaterOptions,
-                CreateGreaterOptions(builder).Union());
+                CreateGreaterOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleGreaterEqual *node)
 {
   export_simple(node, circle::BuiltinOperator_GREATER_EQUAL,
                 circle::BuiltinOptions_GreaterEqualOptions,
-                CreateGreaterEqualOptions(builder).Union());
+                CreateGreaterEqualOptions(_ctx.builder).Union());
 }
 
-void OperationExporter::visit(luci::CircleIf *node)
-{
-  auto if_outs = loco::succs(node);
-  assert(if_outs.size() == node->output_count());
-
-  uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_IF, node->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec;
-
-  inputs_vec.push_back(get_tensor_index(node->cond()));
-  for (uint32_t idx = 0; idx < node->input_count(); ++idx)
-    inputs_vec.push_back(get_tensor_index(node->input(idx)));
-
-  for (uint32_t idx = 0; idx < node->output_count(); ++idx)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : if_outs)
-    {
-      auto if_out = loco::must_cast<luci::CircleIfOut *>(out);
-      if (if_out->index() == static_cast<int32_t>(idx))
-      {
-        outputs_vec.push_back(get_tensor_index(if_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid CircleIf output");
-    }
-  }
-
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateIfOptions(builder, node->then_branch(), node->else_branch());
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_IfOptions, options.Union());
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleIf *node) { export_node(_ctx, node); }
 
 void OperationExporter::visit(luci::CircleL2Normalize *node)
 {
   export_simple(
       node, circle::BuiltinOperator_L2_NORMALIZATION, circle::BuiltinOptions_L2NormOptions,
-      CreateL2NormOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+      CreateL2NormOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()))
+          .Union());
 }
 
 void OperationExporter::visit(luci::CircleL2Pool2D *node)
 {
-  export_pool_2d<luci::CircleL2Pool2D>(node, circle::BuiltinOperator_L2_POOL_2D);
+  export_pool_2d<luci::CircleL2Pool2D>(_ctx, node, circle::BuiltinOperator_L2_POOL_2D);
 }
 
 void OperationExporter::visit(luci::CircleLeakyRelu *node)
 {
   export_simple(node, circle::BuiltinOperator_LEAKY_RELU, circle::BuiltinOptions_LeakyReluOptions,
-                CreateLeakyReluOptions(builder, node->alpha()).Union());
+                CreateLeakyReluOptions(_ctx.builder, node->alpha()).Union());
 }
 
 void OperationExporter::visit(luci::CircleLess *node)
 {
   export_simple(node, circle::BuiltinOperator_LESS, circle::BuiltinOptions_LessOptions,
-                CreateLessOptions(builder).Union());
+                CreateLessOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleLessEqual *node)
 {
   export_simple(node, circle::BuiltinOperator_LESS_EQUAL, circle::BuiltinOptions_LessEqualOptions,
-                CreateLessEqualOptions(builder).Union());
+                CreateLessEqualOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleLocalResponseNormalization *node)
 {
   export_simple(node, circle::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
                 circle::BuiltinOptions_LocalResponseNormalizationOptions,
-                CreateLocalResponseNormalizationOptions(builder, node->radius(), node->bias(),
+                CreateLocalResponseNormalizationOptions(_ctx.builder, node->radius(), node->bias(),
                                                         node->alpha(), node->beta())
                     .Union());
 }
@@ -616,19 +1002,19 @@ void OperationExporter::visit(luci::CircleLog *node)
 void OperationExporter::visit(luci::CircleLogicalAnd *node)
 {
   export_simple(node, circle::BuiltinOperator_LOGICAL_AND, circle::BuiltinOptions_LogicalAndOptions,
-                CreateLogicalAndOptions(builder).Union());
+                CreateLogicalAndOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleLogicalNot *node)
 {
   export_simple(node, circle::BuiltinOperator_LOGICAL_NOT, circle::BuiltinOptions_LogicalNotOptions,
-                CreateLogicalNotOptions(builder).Union());
+                CreateLogicalNotOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleLogicalOr *node)
 {
   export_simple(node, circle::BuiltinOperator_LOGICAL_OR, circle::BuiltinOptions_LogicalOrOptions,
-                CreateLogicalOrOptions(builder).Union());
+                CreateLogicalOrOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleLogistic *node)
@@ -639,135 +1025,103 @@ void OperationExporter::visit(luci::CircleLogistic *node)
 void OperationExporter::visit(luci::CircleLogSoftmax *node)
 {
   export_simple(node, circle::BuiltinOperator_LOG_SOFTMAX, circle::BuiltinOptions_LogSoftmaxOptions,
-                CreateLogSoftmaxOptions(builder).Union());
+                CreateLogSoftmaxOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleMatrixDiag *node)
 {
   export_simple(node, circle::BuiltinOperator_MATRIX_DIAG, circle::BuiltinOptions_MatrixDiagOptions,
-                CreateMatrixDiagOptions(builder).Union());
+                CreateMatrixDiagOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleMatrixSetDiag *node)
 {
   export_simple(node, circle::BuiltinOperator_MATRIX_SET_DIAG,
                 circle::BuiltinOptions_MatrixSetDiagOptions,
-                CreateMatrixSetDiagOptions(builder).Union());
+                CreateMatrixSetDiagOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleMaximum *node)
 {
   export_simple(node, circle::BuiltinOperator_MAXIMUM, circle::BuiltinOptions_MaximumMinimumOptions,
-                CreateMaximumMinimumOptions(builder).Union());
+                CreateMaximumMinimumOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleMaxPool2D *node)
 {
-  export_pool_2d<luci::CircleMaxPool2D>(node, circle::BuiltinOperator_MAX_POOL_2D);
+  export_pool_2d<luci::CircleMaxPool2D>(_ctx, node, circle::BuiltinOperator_MAX_POOL_2D);
 }
 
 void OperationExporter::visit(luci::CircleMean *node)
 {
   export_simple(node, circle::BuiltinOperator_MEAN, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(builder, node->keep_dims()).Union());
+                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
 }
 
 void OperationExporter::visit(luci::CircleMinimum *node)
 {
   export_simple(node, circle::BuiltinOperator_MINIMUM, circle::BuiltinOptions_MaximumMinimumOptions,
-                CreateMaximumMinimumOptions(builder).Union());
+                CreateMaximumMinimumOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleMirrorPad *node)
 {
-  export_simple(node, circle::BuiltinOperator_MIRROR_PAD, circle::BuiltinOptions_MirrorPadOptions,
-                CreateMirrorPadOptions(builder, to_circle_mirrorpadmode(node->mode())).Union());
+  export_simple(
+      node, circle::BuiltinOperator_MIRROR_PAD, circle::BuiltinOptions_MirrorPadOptions,
+      CreateMirrorPadOptions(_ctx.builder, to_circle_mirrorpadmode(node->mode())).Union());
 }
 
 void OperationExporter::visit(luci::CircleMul *node)
 {
   export_simple(
       node, circle::BuiltinOperator_MUL, circle::BuiltinOptions_MulOptions,
-      CreateMulOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+      CreateMulOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
 }
 
 void OperationExporter::visit(luci::CircleNeg *node)
 {
   export_simple(node, circle::BuiltinOperator_NEG, circle::BuiltinOptions_NegOptions,
-                CreateNegOptions(builder).Union());
+                CreateNegOptions(_ctx.builder).Union());
 }
 
-void OperationExporter::visit(luci::CircleNonMaxSuppressionV4 *node)
-{
-  auto nms_outs = loco::succs(node);
-  assert(nms_outs.size() == 2);
+void OperationExporter::visit(luci::CircleNonMaxSuppressionV4 *node) { export_node(_ctx, node); }
 
-  uint32_t op_idx =
-      md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V4, node->op_version());
-  std::vector<int32_t> inputs_vec{
-      get_tensor_index(node->boxes()),           get_tensor_index(node->scores()),
-      get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
-      get_tensor_index(node->score_threshold()),
-  };
-  std::vector<int32_t> outputs_vec;
-
-  for (uint32_t idx = 0; idx < nms_outs.size(); ++idx)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : nms_outs)
-    {
-      auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(out);
-      if (nms_out->index() == static_cast<int32_t>(idx))
-      {
-        outputs_vec.push_back(get_tensor_index(nms_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid NonMaxSuppressionV4 output");
-    }
-  }
-
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateNonMaxSuppressionV4Options(builder);
-  auto op_offset =
-      CreateOperator(builder, op_idx, inputs, outputs,
-                     circle::BuiltinOptions_NonMaxSuppressionV4Options, options.Union());
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleNonMaxSuppressionV5 *node) { export_node(_ctx, node); }
 
 void OperationExporter::visit(luci::CircleNotEqual *node)
 {
   export_simple(node, circle::BuiltinOperator_NOT_EQUAL, circle::BuiltinOptions_NotEqualOptions,
-                CreateNotEqualOptions(builder).Union());
+                CreateNotEqualOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleOneHot *node)
 {
   export_simple(node, circle::BuiltinOperator_ONE_HOT, circle::BuiltinOptions_OneHotOptions,
-                CreateOneHotOptions(builder, node->axis()).Union());
+                CreateOneHotOptions(_ctx.builder, node->axis()).Union());
 }
 
 void OperationExporter::visit(luci::CirclePack *node)
 {
   export_simple(node, circle::BuiltinOperator_PACK, circle::BuiltinOptions_PackOptions,
-                CreatePackOptions(builder, node->values_count(), node->axis()).Union());
+                CreatePackOptions(_ctx.builder, node->values_count(), node->axis()).Union());
 }
 
 void OperationExporter::visit(luci::CirclePad *node)
 {
   export_simple(node, circle::BuiltinOperator_PAD, circle::BuiltinOptions_PadOptions,
-                CreatePadOptions(builder).Union());
+                CreatePadOptions(_ctx.builder).Union());
+}
+
+void OperationExporter::visit(luci::CirclePadV2 *node)
+{
+  export_simple(node, circle::BuiltinOperator_PADV2, circle::BuiltinOptions_PadV2Options,
+                CreatePadV2Options(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CirclePow *node)
 {
   export_simple(node, circle::BuiltinOperator_POW, circle::BuiltinOptions_PowOptions,
-                CreatePowOptions(builder).Union());
+                CreatePowOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CirclePRelu *node)
@@ -778,37 +1132,37 @@ void OperationExporter::visit(luci::CirclePRelu *node)
 void OperationExporter::visit(luci::CircleRange *node)
 {
   export_simple(node, circle::BuiltinOperator_RANGE, circle::BuiltinOptions_RangeOptions,
-                CreateRangeOptions(builder).Union());
+                CreateRangeOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleRank *node)
 {
   export_simple(node, circle::BuiltinOperator_RANK, circle::BuiltinOptions_RankOptions,
-                CreateRankOptions(builder).Union());
+                CreateRankOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleReduceAny *node)
 {
   export_simple(node, circle::BuiltinOperator_REDUCE_ANY, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(builder, node->keep_dims()).Union());
+                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
 }
 
 void OperationExporter::visit(luci::CircleReduceMax *node)
 {
   export_simple(node, circle::BuiltinOperator_REDUCE_MAX, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(builder, node->keep_dims()).Union());
+                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
 }
 
 void OperationExporter::visit(luci::CircleReduceMin *node)
 {
   export_simple(node, circle::BuiltinOperator_REDUCE_MIN, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(builder, node->keep_dims()).Union());
+                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
 }
 
 void OperationExporter::visit(luci::CircleReduceProd *node)
 {
   export_simple(node, circle::BuiltinOperator_REDUCE_PROD, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(builder, node->keep_dims()).Union());
+                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
 }
 
 void OperationExporter::visit(luci::CircleRelu *node)
@@ -828,18 +1182,18 @@ void OperationExporter::visit(luci::CircleReluN1To1 *node)
 
 void OperationExporter::visit(luci::CircleReshape *node)
 {
-  auto new_shape = builder.CreateVector<int32_t>(
+  auto new_shape = _ctx.builder.CreateVector<int32_t>(
       node->newShape()->rank(), [node](size_t i) { return node->newShape()->dim(i); });
 
   export_simple(node, circle::BuiltinOperator_RESHAPE, circle::BuiltinOptions_ReshapeOptions,
-                CreateReshapeOptions(builder, new_shape).Union());
+                CreateReshapeOptions(_ctx.builder, new_shape).Union());
 }
 
 void OperationExporter::visit(luci::CircleResizeBilinear *node)
 {
   export_simple(
       node, circle::BuiltinOperator_RESIZE_BILINEAR, circle::BuiltinOptions_ResizeBilinearOptions,
-      CreateResizeBilinearOptions(builder, node->align_corners(), node->half_pixel_centers())
+      CreateResizeBilinearOptions(_ctx.builder, node->align_corners(), node->half_pixel_centers())
           .Union());
 }
 
@@ -847,29 +1201,17 @@ void OperationExporter::visit(luci::CircleResizeNearestNeighbor *node)
 {
   export_simple(node, circle::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
                 circle::BuiltinOptions_ResizeNearestNeighborOptions,
-                CreateResizeNearestNeighborOptions(builder, node->align_corners()).Union());
+                CreateResizeNearestNeighborOptions(_ctx.builder, node->align_corners()).Union());
 }
 
 void OperationExporter::visit(luci::CircleReverseSequence *node)
 {
   export_simple(
       node, circle::BuiltinOperator_REVERSE_SEQUENCE, circle::BuiltinOptions_ReverseSequenceOptions,
-      CreateReverseSequenceOptions(builder, node->seq_axis(), node->batch_axis()).Union());
+      CreateReverseSequenceOptions(_ctx.builder, node->seq_axis(), node->batch_axis()).Union());
 }
 
-void OperationExporter::visit(luci::CircleReverseV2 *node)
-{
-  uint32_t op_idx =
-      md.registerBuiltinOpcode(circle::BuiltinOperator_REVERSE_V2, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->tensor()), get_tensor_index(node->axis())};
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateReverseV2Options(builder);
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_ReverseSequenceOptions, options.Union());
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleReverseV2 *node) { export_node(_ctx, node); }
 
 void OperationExporter::visit(luci::CircleRound *node)
 {
@@ -884,31 +1226,31 @@ void OperationExporter::visit(luci::CircleRsqrt *node)
 void OperationExporter::visit(luci::CircleScatterNd *node)
 {
   export_simple(node, circle::BuiltinOperator_SCATTER_ND, circle::BuiltinOptions_ScatterNdOptions,
-                CreateScatterNdOptions(builder).Union());
+                CreateScatterNdOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleSegmentSum *node)
 {
   export_simple(node, circle::BuiltinOperator_SEGMENT_SUM, circle::BuiltinOptions_SegmentSumOptions,
-                CreateSegmentSumOptions(builder).Union());
+                CreateSegmentSumOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleSelect *node)
 {
   export_simple(node, circle::BuiltinOperator_SELECT, circle::BuiltinOptions_SelectOptions,
-                CreateSelectOptions(builder).Union());
+                CreateSelectOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleSelectV2 *node)
 {
   export_simple(node, circle::BuiltinOperator_SELECT_V2, circle::BuiltinOptions_SelectV2Options,
-                CreateSelectV2Options(builder).Union());
+                CreateSelectV2Options(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleShape *node)
 {
   export_simple(node, circle::BuiltinOperator_SHAPE, circle::BuiltinOptions_ShapeOptions,
-                CreateShapeOptions(builder, to_circle_tensortype(node->out_type())).Union());
+                CreateShapeOptions(_ctx.builder, to_circle_tensortype(node->out_type())).Union());
 }
 
 void OperationExporter::visit(luci::CircleSin *node)
@@ -919,113 +1261,39 @@ void OperationExporter::visit(luci::CircleSin *node)
 void OperationExporter::visit(luci::CircleSlice *node)
 {
   export_simple(node, circle::BuiltinOperator_SLICE, circle::BuiltinOptions_SliceOptions,
-                CreateSliceOptions(builder).Union());
+                CreateSliceOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleSoftmax *node)
 {
   export_simple(node, circle::BuiltinOperator_SOFTMAX, circle::BuiltinOptions_SoftmaxOptions,
-                CreateSoftmaxOptions(builder, node->beta()).Union());
+                CreateSoftmaxOptions(_ctx.builder, node->beta()).Union());
 }
 
 void OperationExporter::visit(luci::CircleSpaceToBatchND *node)
 {
   export_simple(node, circle::BuiltinOperator_SPACE_TO_BATCH_ND,
                 circle::BuiltinOptions_SpaceToBatchNDOptions,
-                CreateSpaceToBatchNDOptions(builder).Union());
+                CreateSpaceToBatchNDOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleSpaceToDepth *node)
 {
   export_simple(node, circle::BuiltinOperator_SPACE_TO_DEPTH,
                 circle::BuiltinOptions_SpaceToDepthOptions,
-                CreateSpaceToDepthOptions(builder, node->block_size()).Union());
+                CreateSpaceToDepthOptions(_ctx.builder, node->block_size()).Union());
 }
 
 void OperationExporter::visit(luci::CircleSparseToDense *node)
 {
   export_simple(node, circle::BuiltinOperator_SPARSE_TO_DENSE,
                 circle::BuiltinOptions_SparseToDenseOptions,
-                CreateSparseToDenseOptions(builder, node->validate_indices()).Union());
+                CreateSparseToDenseOptions(_ctx.builder, node->validate_indices()).Union());
 }
 
-void OperationExporter::visit(luci::CircleSplit *node)
-{
-  auto split_outs = loco::succs(node);
-  assert(int32_t(split_outs.size()) == node->num_split());
-
-  uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT, node->op_version());
-  // NOTE BuiltinOperator_SPLIT input is placed at second position
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->split_dim()),
-                                  get_tensor_index(node->input())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < node->num_split(); index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : split_outs)
-    {
-      auto split_out = loco::must_cast<luci::CircleSplitOut *>(out);
-      if (split_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(split_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid Split output");
-    }
-  }
-
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateSplitOptions(builder, node->num_split());
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_SplitOptions, options.Union());
-  gd._operators.push_back(op_offset);
-}
-
-void OperationExporter::visit(luci::CircleSplitV *node)
-{
-  auto split_outs = loco::succs(node);
-  assert(int32_t(split_outs.size()) == node->num_split());
-
-  uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT_V, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
-                                  get_tensor_index(node->size_splits()),
-                                  get_tensor_index(node->split_dim())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < node->num_split(); index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : split_outs)
-    {
-      auto split_out = loco::must_cast<luci::CircleSplitVOut *>(out);
-      if (split_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(split_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid SplitV output");
-    }
-  }
+void OperationExporter::visit(luci::CircleSplit *node) { export_node(_ctx, node); }
 
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateSplitVOptions(builder, node->num_split());
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_SplitVOptions, options.Union());
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleSplitV *node) { export_node(_ctx, node); }
 
 void OperationExporter::visit(luci::CircleSqrt *node)
 {
@@ -1035,28 +1303,28 @@ void OperationExporter::visit(luci::CircleSqrt *node)
 void OperationExporter::visit(luci::CircleSquare *node)
 {
   export_simple(node, circle::BuiltinOperator_SQUARE, circle::BuiltinOptions_SquareOptions,
-                CreateSquareOptions(builder).Union());
+                CreateSquareOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleSquaredDifference *node)
 {
   export_simple(node, circle::BuiltinOperator_SQUARED_DIFFERENCE,
                 circle::BuiltinOptions_SquaredDifferenceOptions,
-                CreateSquaredDifferenceOptions(builder).Union());
+                CreateSquaredDifferenceOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleSqueeze *node)
 {
-  auto squeeze_dims = builder.CreateVector<int32_t>(node->squeeze_dims());
+  auto squeeze_dims = _ctx.builder.CreateVector<int32_t>(node->squeeze_dims());
   export_simple(node, circle::BuiltinOperator_SQUEEZE, circle::BuiltinOptions_SqueezeOptions,
-                CreateSqueezeOptions(builder, squeeze_dims).Union());
+                CreateSqueezeOptions(_ctx.builder, squeeze_dims).Union());
 }
 
 void OperationExporter::visit(luci::CircleStridedSlice *node)
 {
   export_simple(node, circle::BuiltinOperator_STRIDED_SLICE,
                 circle::BuiltinOptions_StridedSliceOptions,
-                CreateStridedSliceOptions(builder, node->begin_mask(), node->end_mask(),
+                CreateStridedSliceOptions(_ctx.builder, node->begin_mask(), node->end_mask(),
                                           node->ellipsis_mask(), node->new_axis_mask(),
                                           node->shrink_axis_mask())
                     .Union());
@@ -1066,13 +1334,13 @@ void OperationExporter::visit(luci::CircleSub *node)
 {
   export_simple(
       node, circle::BuiltinOperator_SUB, circle::BuiltinOptions_SubOptions,
-      CreateSubOptions(builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
+      CreateSubOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
 }
 
 void OperationExporter::visit(luci::CircleSum *node)
 {
   export_simple(node, circle::BuiltinOperator_SUM, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(builder, node->keep_dims()).Union());
+                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
 }
 
 void OperationExporter::visit(luci::CircleTanh *node)
@@ -1083,226 +1351,65 @@ void OperationExporter::visit(luci::CircleTanh *node)
 void OperationExporter::visit(luci::CircleTile *node)
 {
   export_simple(node, circle::BuiltinOperator_TILE, circle::BuiltinOptions_TileOptions,
-                CreateTileOptions(builder).Union());
+                CreateTileOptions(_ctx.builder).Union());
 }
 
-void OperationExporter::visit(luci::CircleTopKV2 *node)
-{
-  auto topkv2_outs = loco::succs(node);
-  int outs_count = int32_t(topkv2_outs.size());
-  assert(outs_count == 2);
-
-  uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_TOPK_V2, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->k())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < outs_count; index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : topkv2_outs)
-    {
-      auto topkv2_out = loco::must_cast<luci::CircleTopKV2Out *>(out);
-      if (topkv2_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(topkv2_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid TopKV2 output");
-    }
-  }
-
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateTopKV2Options(builder);
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_TopKV2Options, options.Union());
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleTopKV2 *node) { export_node(_ctx, node); }
 
 void OperationExporter::visit(luci::CircleTranspose *node)
 {
   export_simple(node, circle::BuiltinOperator_TRANSPOSE, circle::BuiltinOptions_TransposeOptions,
-                CreateTransposeOptions(builder).Union());
+                CreateTransposeOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleTransposeConv *node)
 {
   export_simple(node, circle::BuiltinOperator_TRANSPOSE_CONV,
                 circle::BuiltinOptions_TransposeConvOptions,
-                CreateTransposeConvOptions(builder, getOpPadding(node->padding()),
+                CreateTransposeConvOptions(_ctx.builder, getOpPadding(node->padding()),
                                            node->stride()->w(), node->stride()->h())
                     .Union());
 }
 
-void OperationExporter::visit(luci::CircleUnique *node)
-{
-  auto unique_outs = loco::succs(node);
-  assert(int32_t(unique_outs.size()) == 2);
-  uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_UNIQUE, node->op_version());
+void OperationExporter::visit(luci::CircleUnique *node) { export_node(_ctx, node); }
 
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < 2; index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : unique_outs)
-    {
-      auto unique_out = loco::must_cast<luci::CircleUniqueOut *>(out);
-      if (unique_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(unique_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid Unique output");
-    }
-  }
-
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateUniqueOptions(builder, to_circle_tensortype(node->idx_out_type()));
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_UniqueOptions, options.Union());
-  gd._operators.push_back(op_offset);
-}
-
-void OperationExporter::visit(luci::CircleUnpack *node)
-{
-  LOGGER(l);
-  auto settings = luci::UserSettings::settings();
-
-  auto unpack_outs = loco::succs(node);
-  // NOTE real models may not use all of the outputs
-  if (static_cast<int32_t>(unpack_outs.size()) != node->num())
-  {
-    if (settings->get(luci::UserSettings::Key::DisableValidation))
-    {
-      WARN(l) << "Warning: export Unpack(" << node->name() << ") 'num' not same as outputs";
-    }
-    else
-      assert(false);
-  }
-
-  uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_UNPACK, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < node->num(); index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : unpack_outs)
-    {
-      auto unpack_out = loco::must_cast<luci::CircleUnpackOut *>(out);
-      if (unpack_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(unpack_out));
-        found = true;
-        break;
-      }
-    }
-    // NOTE real models may not use all of the outputs
-    if (!found)
-    {
-      if (settings->get(luci::UserSettings::Key::DisableValidation))
-      {
-        WARN(l) << "Warning: export Unpack(" << node->name() << ") output " << index << " not used";
-      }
-      else
-        assert(false);
-    }
-  }
-
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateUnpackOptions(builder, node->num(), node->axis());
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_UnpackOptions, options.Union());
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleUnpack *node) { export_node(_ctx, node); }
 
 void OperationExporter::visit(luci::CircleWhere *node)
 {
   export_simple(node, circle::BuiltinOperator_WHERE, circle::BuiltinOptions_WhereOptions,
-                CreateWhereOptions(builder).Union());
+                CreateWhereOptions(_ctx.builder).Union());
 }
 
-void OperationExporter::visit(luci::CircleWhile *node)
-{
-  auto while_outs = loco::succs(node);
-  assert(while_outs.size() == node->output_count());
-
-  uint32_t op_idx = md.registerBuiltinOpcode(circle::BuiltinOperator_WHILE, node->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec;
-
-  for (uint32_t idx = 0; idx < node->input_count(); ++idx)
-    inputs_vec.push_back(get_tensor_index(node->input(idx)));
-
-  for (uint32_t idx = 0; idx < node->output_count(); ++idx)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : while_outs)
-    {
-      auto while_out = loco::must_cast<luci::CircleWhileOut *>(out);
-      if (while_out->index() == static_cast<int32_t>(idx))
-      {
-        outputs_vec.push_back(get_tensor_index(while_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid CircleWhile output");
-    }
-  }
-
-  auto inputs = builder.CreateVector(inputs_vec);
-  auto outputs = builder.CreateVector(outputs_vec);
-  auto options = CreateWhileOptions(builder, node->cond_branch(), node->body_branch());
-  auto op_offset = CreateOperator(builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_WhileOptions, options.Union());
-  gd._operators.push_back(op_offset);
-}
+void OperationExporter::visit(luci::CircleWhile *node) { export_node(_ctx, node); }
 
 void OperationExporter::visit(luci::CircleZerosLike *node)
 {
   export_simple(node, circle::BuiltinOperator_ZEROS_LIKE, circle::BuiltinOptions_ZerosLikeOptions,
-                CreateZerosLikeOptions(builder).Union());
+                CreateZerosLikeOptions(_ctx.builder).Union());
 }
 
 void OperationExporter::visit(luci::CircleBCQFullyConnected *node)
 {
   export_simple(node, circle::BuiltinOperator_BCQ_FULLY_CONNECTED,
                 circle::BuiltinOptions_BCQFullyConnectedOptions,
-                CreateBCQFullyConnectedOptions(builder, node->weights_hidden_size(),
+                CreateBCQFullyConnectedOptions(_ctx.builder, node->weights_hidden_size(),
                                                to_circle_actfunc(node->fusedActivationFunction()))
                     .Union());
 }
 
 void OperationExporter::visit(luci::CircleBCQGather *node)
 {
-  export_simple(node, circle::BuiltinOperator_BCQ_GATHER, circle::BuiltinOptions_BCQGatherOptions,
-                CreateBCQGatherOptions(builder, node->input_hidden_size(), node->axis()).Union());
+  export_simple(
+      node, circle::BuiltinOperator_BCQ_GATHER, circle::BuiltinOptions_BCQGatherOptions,
+      CreateBCQGatherOptions(_ctx.builder, node->input_hidden_size(), node->axis()).Union());
 }
 
 void OperationExporter::visit(luci::CircleInstanceNorm *node)
 {
   export_simple(node, circle::BuiltinOperator_INSTANCE_NORM,
                 circle::BuiltinOptions_InstanceNormOptions,
-                CreateInstanceNormOptions(builder, node->epsilon(),
+                CreateInstanceNormOptions(_ctx.builder, node->epsilon(),
                                           to_circle_actfunc(node->fusedActivationFunction()))
                     .Union());
 }
@@ -1312,7 +1419,8 @@ void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, Seria
 {
   if (auto circle_node = dynamic_cast<luci::CircleNode *>(node))
   {
-    OperationExporter exporter{builder, md, gd};
+    ExportContext ctx{builder, md, gd};
+    OperationExporter exporter{ctx};
     circle_node->accept(&exporter);
   }
   else
diff --git a/compiler/luci/import/include/luci/Import/Nodes.h b/compiler/luci/import/include/luci/Import/Nodes.h
index 825c2147d..0b21d380f 100644
--- a/compiler/luci/import/include/luci/Import/Nodes.h
+++ b/compiler/luci/import/include/luci/Import/Nodes.h
@@ -74,10 +74,12 @@
 #include "Nodes/CircleMul.h"
 #include "Nodes/CircleNeg.h"
 #include "Nodes/CircleNonMaxSuppressionV4.h"
+#include "Nodes/CircleNonMaxSuppressionV5.h"
 #include "Nodes/CircleNotEqual.h"
 #include "Nodes/CircleOneHot.h"
 #include "Nodes/CirclePack.h"
 #include "Nodes/CirclePad.h"
+#include "Nodes/CirclePadV2.h"
 #include "Nodes/CirclePow.h"
 #include "Nodes/CirclePRelu.h"
 #include "Nodes/CircleRange.h"
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h b/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h
new file mode 100644
index 000000000..62be0758e
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleNonMaxSuppressionV5.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
+#define __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
+
+#include "luci/Import/GraphBuilderBase.h"
+
+namespace luci
+{
+
+class CircleNonMaxSuppressionV5GraphBuilder : public GraphBuilderBase
+{
+public:
+  bool validate(const ValidateArgs &args) const final;
+
+  void build(const circle::OperatorT &op, GraphBuilderContext *context) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CirclePadV2.h b/compiler/luci/import/include/luci/Import/Nodes/CirclePadV2.h
new file mode 100644
index 000000000..089f52c81
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CirclePadV2.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_PADV2_H__
+#define __LUCI_IMPORT_OP_CIRCLE_PADV2_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CirclePadV2GraphBuilder : public GraphBuilder
+{
+public:
+  bool validate(const ValidateArgs &args) const final;
+
+private:
+  CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+                         loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_PADV2_H__
diff --git a/compiler/luci/import/src/GraphBuilderRegistry.cpp b/compiler/luci/import/src/GraphBuilderRegistry.cpp
index cc328cc16..c6bcacb54 100644
--- a/compiler/luci/import/src/GraphBuilderRegistry.cpp
+++ b/compiler/luci/import/src/GraphBuilderRegistry.cpp
@@ -83,10 +83,12 @@ GraphBuilderRegistry::GraphBuilderRegistry()
   CIRCLE_NODE(MUL, CircleMulGraphBuilder);                                                 // 18
   CIRCLE_NODE(NEG, CircleNegGraphBuilder);                                                 // 59
   CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, CircleNonMaxSuppressionV4GraphBuilder);              // 120,
+  CIRCLE_NODE(NON_MAX_SUPPRESSION_V5, CircleNonMaxSuppressionV5GraphBuilder);              // 121,
   CIRCLE_NODE(NOT_EQUAL, CircleNotEqualGraphBuilder);                                      // 72
   CIRCLE_NODE(ONE_HOT, CircleOneHotGraphBuilder);                                          // 85
   CIRCLE_NODE(PACK, CirclePackGraphBuilder);                                               // 83
   CIRCLE_NODE(PAD, CirclePadGraphBuilder);                                                 // 34
+  CIRCLE_NODE(PADV2, CirclePadV2GraphBuilder);                                             // 60
   CIRCLE_NODE(POW, CirclePowGraphBuilder);                                                 // 78
   CIRCLE_NODE(PRELU, CirclePReluGraphBuilder);                                             // 54,
   CIRCLE_NODE(RANGE, CircleRangeGraphBuilder);                                             // 96
@@ -155,11 +157,9 @@ GraphBuilderRegistry::GraphBuilderRegistry()
   // BuiltinOperator_DELEGATE = 51,
   // BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52,
   // BuiltinOperator_ARG_MAX = 56,
-  // BuiltinOperator_PADV2 = 60,
   // BuiltinOperator_FAKE_QUANT = 80,
   // BuiltinOperator_QUANTIZE = 114,
   // BuiltinOperator_HARD_SWISH = 117,
-  // BuiltinOperator_NON_MAX_SUPPRESSION_V5 = 121,
   // BuiltinOperator_DENSIFY = 124,
 }
 
diff --git a/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp b/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp
index 8c2039fff..7faab141c 100644
--- a/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp
+++ b/compiler/luci/import/src/Nodes/CircleBatchToSpaceND.cpp
@@ -18,49 +18,16 @@
 
 #include <luci/IR/Nodes/CircleBatchToSpaceND.h>
 
-#include <loco.h>
+#include "ValidateHelpers.h"
 
-#include <cassert>
+#include <loco.h>
 
 namespace luci
 {
 
 bool CircleBatchToSpaceNDGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 3)
-    return false;
-
-  // input 1 and 2 should have INT32/INT64 type
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_1 = tensors.at(inputs.at(1));
-  switch (tensor_1->type)
-  {
-    case circle::TensorType_INT32:
-    case circle::TensorType_INT64:
-      break;
-    default:
-      return false;
-  }
-  const auto &tensor_2 = tensors.at(inputs.at(2));
-  switch (tensor_2->type)
-  {
-    case circle::TensorType_INT32:
-    case circle::TensorType_INT64:
-      break;
-    default:
-      return false;
-  }
-
-  // Only support input shape dimension 3 and 4 only
-  const auto &tensor_0 = tensors.at(inputs.at(0));
-  const auto t_0_s = tensor_0->shape.size();
-  if (t_0_s != 3 && t_0_s != 4)
-    return false;
-
-  // TODO check input shape
-
-  return true;
+  return validate_batch_space_nd(args);
 }
 
 CircleNode *CircleBatchToSpaceNDGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleConst.cpp b/compiler/luci/import/src/Nodes/CircleConst.cpp
index 7131dc115..fad7a0757 100644
--- a/compiler/luci/import/src/Nodes/CircleConst.cpp
+++ b/compiler/luci/import/src/Nodes/CircleConst.cpp
@@ -118,6 +118,10 @@ CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_ind
         copy_data<loco::DataType::U8>(buffer, num_elements, const_node);
         break;
 
+      case loco::DataType::S8:
+        copy_data<loco::DataType::S8>(buffer, num_elements, const_node);
+        break;
+
       case loco::DataType::S16:
         copy_data<loco::DataType::S16>(buffer, num_elements, const_node);
         break;
diff --git a/compiler/luci/import/src/Nodes/CircleMaximum.cpp b/compiler/luci/import/src/Nodes/CircleMaximum.cpp
index 4d1468f19..805d5bc89 100644
--- a/compiler/luci/import/src/Nodes/CircleMaximum.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMaximum.cpp
@@ -18,6 +18,8 @@
 
 #include <luci/IR/Nodes/CircleMaximum.h>
 
+#include "ValidateHelpers.h"
+
 #include <loco.h>
 
 namespace luci
@@ -25,37 +27,7 @@ namespace luci
 
 bool CircleMaximumGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-    return false;
-
-  if (outputs.size() != 1)
-    return false;
-
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-
-  switch (tensor->type)
-  {
-    case circle::TensorType_FLOAT16:
-    case circle::TensorType_FLOAT32:
-    case circle::TensorType_FLOAT64:
-    case circle::TensorType_INT32:
-    case circle::TensorType_INT64:
-      break;
-    default:
-      return false;
-  }
-
-  if (tensors[inputs.at(1)]->type != tensor->type)
-    return false;
-
-  if (tensors[outputs[0]]->type != tensor->type)
-    return false;
-
-  return true;
+  return validate_minmax(args);
 }
 
 CircleNode *CircleMaximumGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleMinimum.cpp b/compiler/luci/import/src/Nodes/CircleMinimum.cpp
index 8b4daf197..381039e88 100644
--- a/compiler/luci/import/src/Nodes/CircleMinimum.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMinimum.cpp
@@ -18,6 +18,8 @@
 
 #include <luci/IR/Nodes/CircleMinimum.h>
 
+#include "ValidateHelpers.h"
+
 #include <loco.h>
 
 namespace luci
@@ -25,37 +27,7 @@ namespace luci
 
 bool CircleMinimumGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-    return false;
-
-  if (outputs.size() != 1)
-    return false;
-
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-
-  switch (tensor->type)
-  {
-    case circle::TensorType_FLOAT16:
-    case circle::TensorType_FLOAT32:
-    case circle::TensorType_FLOAT64:
-    case circle::TensorType_INT32:
-    case circle::TensorType_INT64:
-      break;
-    default:
-      return false;
-  }
-
-  if (tensors[inputs.at(1)]->type != tensor->type)
-    return false;
-
-  if (tensors[outputs[0]]->type != tensor->type)
-    return false;
-
-  return true;
+  return validate_minmax(args);
 }
 
 CircleNode *CircleMinimumGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp
new file mode 100644
index 000000000..241dbf5ff
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleNonMaxSuppressionV5.h"
+
+#include <luci/IR/Nodes/CircleNonMaxSuppressionV5.h>
+#include <luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h>
+
+#include <loco.h>
+#include <oops/UserExn.h>
+
+namespace luci
+{
+
+bool CircleNonMaxSuppressionV5GraphBuilder::validate(const ValidateArgs &args) const
+{
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+
+  if (inputs.size() != 6)
+    return false;
+  if (outputs.size() != 3)
+    return false;
+
+  const auto &tensors = args.reader.tensors();
+  const auto &boxes_tensor = tensors.at(inputs[0]);
+  if (boxes_tensor->shape.size() != 2)
+    return false;
+  if (boxes_tensor->shape.at(1) != 4)
+    return false;
+  if (boxes_tensor->shape.at(0) != tensors.at(inputs[1])->shape.at(0))
+    return false;
+
+  if (tensors.at(inputs[2])->type != circle::TensorType_INT32)
+    return false;
+  if (tensors.at(inputs[3])->type != circle::TensorType_FLOAT32)
+    return false;
+  if (tensors.at(inputs[4])->type != circle::TensorType_FLOAT32)
+    return false;
+  if (tensors.at(inputs[5])->type != circle::TensorType_FLOAT32)
+    return false;
+
+  return true;
+}
+
+/**
+ * @brief  NonMaxSuppressionV5 Node builder
+ *
+ * @note   Current loco does not provide multiple outputs
+ *         We will create multiple NonMasSuppressionV5Oout nodes to emulate this
+ */
+
+void CircleNonMaxSuppressionV5GraphBuilder::build(const circle::OperatorT &op,
+                                                  GraphBuilderContext *context) const
+{
+  assert(context != nullptr);
+
+  auto graph = context->graph();
+
+  const std::vector<int32_t> &inputs = op.inputs;
+  const std::vector<int32_t> &outputs = op.outputs;
+  const auto &tensors = context->reader()->tensors();
+  const auto &opcodes = context->reader()->opcodes();
+  auto tensors_ptr = context->reader()->tensors_ptr();
+  assert(tensors_ptr != nullptr);
+
+  std::vector<CircleNode *> input_nodes;
+  for (const int32_t input_tensor_index : inputs)
+  {
+    input_nodes.push_back(context->nodefinder()->node(input_tensor_index));
+  }
+
+  // Create CircleNonMaxSuppressionV5
+  auto node = graph->nodes()->create<CircleNonMaxSuppressionV5>();
+  node->boxes(input_nodes[0]);
+  node->scores(input_nodes[1]);
+  node->max_output_size(input_nodes[2]);
+  node->iou_threshold(input_nodes[3]);
+  node->score_threshold(input_nodes[4]);
+  node->soft_nms_sigma(input_nodes[5]);
+
+  assert(outputs.size() == 3);
+  {
+    // Let's use name of output 0 as NonMaxSuppressionV5 name
+    const circle::TensorT &output_tensor = *tensors[outputs[0]];
+    node->name(tensor_name(output_tensor));
+    node->op_version(opcodes[op.opcode_index].get()->version);
+
+    // NOTE We don't set quantization for NonMaxSuppressionV5 itself but to virtual outputs
+  }
+
+  // Create virtual outputs of NonMaxSuppressionV5
+  for (size_t n = 0; n < outputs.size(); ++n)
+  {
+    const circle::TensorT &output_tensor = *tensors[outputs[n]];
+
+    auto *nodeout = graph->nodes()->create<CircleNonMaxSuppressionV5Out>();
+    copy_tensor_attributes(output_tensor, nodeout);
+
+    // mark shape_status
+    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
+      nodeout->shape_status(ShapeStatus::NOSHAPE);
+    else
+      nodeout->shape_status(ShapeStatus::VALID);
+
+    nodeout->input(node);
+    nodeout->index(n);
+
+    context->nodefinder()->enroll(outputs[n], nodeout);
+  }
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CirclePadV2.cpp b/compiler/luci/import/src/Nodes/CirclePadV2.cpp
new file mode 100644
index 000000000..493876e68
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CirclePadV2.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CirclePadV2.h"
+
+#include <luci/IR/Nodes/CirclePadV2.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CirclePadV2GraphBuilder::validate(const ValidateArgs &args) const
+{
+  if (args.op.inputs.size() != 3)
+    return false;
+
+  if (args.op.outputs.size() != 1)
+    return false;
+
+  return true;
+}
+
+CircleNode *CirclePadV2GraphBuilder::build_node(const circle::OperatorT &op,
+                                                const std::vector<CircleNode *> &inputs,
+                                                loco::Graph *graph) const
+{
+  auto *node = graph->nodes()->create<CirclePadV2>();
+  node->input(inputs[0]);
+  node->paddings(inputs[1]);
+  node->constant_values(inputs[2]);
+
+  const auto *options = op.builtin_options.AsPadV2Options();
+  (void)options; // There are no options.
+
+  return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleReduceMax.cpp b/compiler/luci/import/src/Nodes/CircleReduceMax.cpp
index 05492dbc6..e633abf7d 100644
--- a/compiler/luci/import/src/Nodes/CircleReduceMax.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReduceMax.cpp
@@ -18,33 +18,14 @@
 
 #include <luci/IR/Nodes/CircleReduceMax.h>
 
+#include "ValidateHelpers.h"
+
 namespace luci
 {
 
 bool CircleReduceMaxGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-    return false;
-
-  if (outputs.size() != 1)
-    return false;
-
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_axis = tensors.at(inputs.at(1));
-
-  switch (tensor_axis->type)
-  {
-    case circle::TensorType_INT32:
-    case circle::TensorType_INT64:
-      break;
-    default:
-      return false;
-  }
-
-  return true;
+  return validate_reduce_minmax(args);
 }
 
 CircleNode *CircleReduceMaxGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleReduceMin.cpp b/compiler/luci/import/src/Nodes/CircleReduceMin.cpp
index 117d5295a..bfc3001f8 100644
--- a/compiler/luci/import/src/Nodes/CircleReduceMin.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReduceMin.cpp
@@ -18,33 +18,14 @@
 
 #include <luci/IR/Nodes/CircleReduceMin.h>
 
+#include "ValidateHelpers.h"
+
 namespace luci
 {
 
 bool CircleReduceMinGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  const auto &outputs = args.op.outputs;
-
-  if (inputs.size() != 2)
-    return false;
-
-  if (outputs.size() != 1)
-    return false;
-
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_axis = tensors.at(inputs.at(1));
-
-  switch (tensor_axis->type)
-  {
-    case circle::TensorType_INT32:
-    case circle::TensorType_INT64:
-      break;
-    default:
-      return false;
-  }
-
-  return true;
+  return validate_reduce_minmax(args);
 }
 
 CircleNode *CircleReduceMinGraphBuilder::build_node(const circle::OperatorT &op,
diff --git a/compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp b/compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp
index c1d508e3e..fbf9f6b12 100644
--- a/compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSpaceToBatchND.cpp
@@ -18,49 +18,16 @@
 
 #include <luci/IR/Nodes/CircleSpaceToBatchND.h>
 
-#include <loco.h>
+#include "ValidateHelpers.h"
 
-#include <cassert>
+#include <loco.h>
 
 namespace luci
 {
 
 bool CircleSpaceToBatchNDGraphBuilder::validate(const ValidateArgs &args) const
 {
-  const auto &inputs = args.op.inputs;
-  if (inputs.size() != 3)
-    return false;
-
-  // input 1 and 2 should have INT32/INT64 type
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_1 = tensors.at(inputs.at(1));
-  switch (tensor_1->type)
-  {
-    case circle::TensorType_INT32:
-    case circle::TensorType_INT64:
-      break;
-    default:
-      return false;
-  }
-  const auto &tensor_2 = tensors.at(inputs.at(2));
-  switch (tensor_2->type)
-  {
-    case circle::TensorType_INT32:
-    case circle::TensorType_INT64:
-      break;
-    default:
-      return false;
-  }
-
-  // Only support input shape dimension 3 and 4 only
-  const auto &tensor_0 = tensors.at(inputs.at(0));
-  const auto t_0_s = tensor_0->shape.size();
-  if (t_0_s != 3 && t_0_s != 4)
-    return false;
-
-  // TODO check input shape
-
-  return true;
+  return validate_batch_space_nd(args);
 }
 
 CircleNode *CircleSpaceToBatchNDGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp b/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp
index 26d575e90..ac756b1f3 100644
--- a/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSparseToDense.cpp
@@ -42,7 +42,8 @@ CircleNode *CircleSparseToDenseGraphBuilder::build_node(const circle::OperatorT
   node->default_value(inputs.at(3));
 
   const auto *options = op.builtin_options.AsSparseToDenseOptions();
-  node->validate_indices(options->validate_indices);
+  if (options)
+    node->validate_indices(options->validate_indices);
 
   return node;
 }
diff --git a/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp b/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
index ddb196657..c280faaf5 100644
--- a/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
@@ -27,7 +27,7 @@ namespace luci
 
 bool CircleTransposeConvGraphBuilder::validate(const ValidateArgs &args) const
 {
-  if (args.op.inputs.size() != 3)
+  if (args.op.inputs.size() != 3 && args.op.inputs.size() != 4)
     return false;
 
   const auto &inputs = args.op.inputs;
@@ -60,6 +60,17 @@ CircleNode *CircleTransposeConvGraphBuilder::build_node(const circle::OperatorT
   node->inputSizes(inputs.at(0));
   node->filter(inputs.at(1));
   node->outBackprop(inputs.at(2));
+  if (inputs.size() == 3)
+    node->bias(graph->nodes()->create<CircleOutputExclude>());
+  else
+    node->bias(inputs.at(3));
+
+  if (auto bias = dynamic_cast<luci::CircleOutputExclude *>(node->bias()))
+  {
+    // CircleOutputExclude doesn't need a type, but since all nodes must have a type, a dummy type
+    // is inserted.
+    bias->dtype(loco::DataType::FLOAT32);
+  }
 
   const auto *options = op.builtin_options.AsTransposeConvOptions();
   node->padding(luci_padding(options->padding));
diff --git a/compiler/luci/import/src/ValidateHelpers.cpp b/compiler/luci/import/src/ValidateHelpers.cpp
new file mode 100644
index 000000000..12a6548d6
--- /dev/null
+++ b/compiler/luci/import/src/ValidateHelpers.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ValidateHelpers.h"
+
+namespace luci
+{
+
+bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args)
+{
+  const auto &inputs = args.op.inputs;
+  if (inputs.size() != 3)
+    return false;
+
+  // input 1 and 2 should have INT32/INT64 type
+  const auto &tensors = args.reader.tensors();
+  const auto &tensor_1 = tensors.at(inputs.at(1));
+  switch (tensor_1->type)
+  {
+    case circle::TensorType_INT32:
+    case circle::TensorType_INT64:
+      break;
+    default:
+      return false;
+  }
+  const auto &tensor_2 = tensors.at(inputs.at(2));
+  switch (tensor_2->type)
+  {
+    case circle::TensorType_INT32:
+    case circle::TensorType_INT64:
+      break;
+    default:
+      return false;
+  }
+
+  // Only support input shape dimension 3 and 4 only
+  const auto &tensor_0 = tensors.at(inputs.at(0));
+  const auto t_0_s = tensor_0->shape.size();
+  if (t_0_s != 3 && t_0_s != 4)
+    return false;
+
+  // TODO check input shape
+
+  return true;
+}
+
+bool validate_minmax(const GraphBuilderBase::ValidateArgs &args)
+{
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+
+  if (inputs.size() != 2)
+    return false;
+
+  if (outputs.size() != 1)
+    return false;
+
+  const auto &tensors = args.reader.tensors();
+  const auto &tensor = tensors.at(inputs.at(0));
+
+  switch (tensor->type)
+  {
+    case circle::TensorType_FLOAT16:
+    case circle::TensorType_FLOAT32:
+    case circle::TensorType_FLOAT64:
+    case circle::TensorType_INT32:
+    case circle::TensorType_INT64:
+      break;
+    default:
+      return false;
+  }
+
+  if (tensors[inputs.at(1)]->type != tensor->type)
+    return false;
+
+  if (tensors[outputs[0]]->type != tensor->type)
+    return false;
+
+  return true;
+}
+
+bool validate_reduce_minmax(const GraphBuilderBase::ValidateArgs &args)
+{
+  const auto &inputs = args.op.inputs;
+  const auto &outputs = args.op.outputs;
+
+  if (inputs.size() != 2)
+    return false;
+
+  if (outputs.size() != 1)
+    return false;
+
+  const auto &tensors = args.reader.tensors();
+  const auto &tensor_axis = tensors.at(inputs.at(1));
+
+  switch (tensor_axis->type)
+  {
+    case circle::TensorType_INT32:
+    case circle::TensorType_INT64:
+      break;
+    default:
+      return false;
+  }
+
+  return true;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/ValidateHelpers.h b/compiler/luci/import/src/ValidateHelpers.h
new file mode 100644
index 000000000..4047b2f08
--- /dev/null
+++ b/compiler/luci/import/src/ValidateHelpers.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_VALIDATE_HELPERS_H__
+#define __LUCI_VALIDATE_HELPERS_H__
+
+#include "luci/Import/GraphBuilderBase.h"
+
+/**
+ * @Note Methods in this file provides helper functions to reduce duplicate codes
+ */
+
+namespace luci
+{
+
+bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args);
+bool validate_minmax(const GraphBuilderBase::ValidateArgs &args);
+bool validate_reduce_minmax(const GraphBuilderBase::ValidateArgs &args);
+
+} // namespace luci
+
+#endif // __LUCI_VALIDATE_HELPERS_H__
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.h b/compiler/luci/lang/include/luci/IR/CircleNodes.h
index e57f5bb3e..25b86d2e9 100644
--- a/compiler/luci/lang/include/luci/IR/CircleNodes.h
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.h
@@ -71,6 +71,7 @@
 #include "Nodes/CircleMul.h"
 #include "Nodes/CircleNeg.h"
 #include "Nodes/CircleNonMaxSuppressionV4.h"
+#include "Nodes/CircleNonMaxSuppressionV5.h"
 #include "Nodes/CircleNotEqual.h"
 #include "Nodes/CircleOneHot.h"
 #include "Nodes/CirclePack.h"
@@ -134,6 +135,7 @@
 #include "Nodes/CircleCustomOut.h"
 #include "Nodes/CircleIfOut.h"
 #include "Nodes/CircleNonMaxSuppressionV4Out.h"
+#include "Nodes/CircleNonMaxSuppressionV5Out.h"
 #include "Nodes/CircleUnpackOut.h"
 #include "Nodes/CircleUniqueOut.h"
 #include "Nodes/CircleSplitOut.h"
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.lst b/compiler/luci/lang/include/luci/IR/CircleNodes.lst
index 801051848..9f0a1b16e 100644
--- a/compiler/luci/lang/include/luci/IR/CircleNodes.lst
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.lst
@@ -64,6 +64,7 @@ CIRCLE_NODE(MIRROR_PAD, luci::CircleMirrorPad)
 CIRCLE_NODE(MUL, luci::CircleMul)
 CIRCLE_NODE(NEG, luci::CircleNeg)
 CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, luci::CircleNonMaxSuppressionV4)
+CIRCLE_NODE(NON_MAX_SUPPRESSION_V5, luci::CircleNonMaxSuppressionV5)
 CIRCLE_NODE(NOT_EQUAL, luci::CircleNotEqual)
 CIRCLE_NODE(ONE_HOT, luci::CircleOneHot)
 CIRCLE_NODE(PACK, luci::CirclePack)
@@ -130,6 +131,7 @@ CIRCLE_NODE(CIRCLEOUTPUTEXCLUDE, luci::CircleOutputExclude)
 CIRCLE_NODE(CIRCLECUSTOMOUT, luci::CircleCustomOut)
 CIRCLE_NODE(CIRCLEIFOUT, luci::CircleIfOut)
 CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV4OUT, luci::CircleNonMaxSuppressionV4Out)
+CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV5OUT, luci::CircleNonMaxSuppressionV5Out)
 CIRCLE_NODE(CIRCLESPLITOUT, luci::CircleSplitOut)
 CIRCLE_NODE(CIRCLESPLITVOUT, luci::CircleSplitVOut)
 CIRCLE_NODE(CIRCLETOPKV2OUT, luci::CircleTopKV2Out)
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h
new file mode 100644
index 000000000..52d682147
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
+#define __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief NON_MAX_SUPPRESSION_V5 in Circle
+ */
+class CircleNonMaxSuppressionV5 final
+    : public FixedArityNode<6, CircleNodeImpl<CircleOpcode::NON_MAX_SUPPRESSION_V5>>
+{
+public:
+  loco::Node *boxes(void) const { return at(0)->node(); }
+  void boxes(loco::Node *node) { at(0)->node(node); }
+
+  loco::Node *scores(void) const { return at(1)->node(); }
+  void scores(loco::Node *node) { at(1)->node(node); }
+
+  loco::Node *max_output_size(void) const { return at(2)->node(); }
+  void max_output_size(loco::Node *node) { at(2)->node(node); }
+
+  loco::Node *iou_threshold(void) const { return at(3)->node(); }
+  void iou_threshold(loco::Node *node) { at(3)->node(node); }
+
+  loco::Node *score_threshold(void) const { return at(4)->node(); }
+  void score_threshold(loco::Node *node) { at(4)->node(node); }
+
+  loco::Node *soft_nms_sigma(void) const { return at(5)->node(); }
+  void soft_nms_sigma(loco::Node *node) { at(5)->node(node); }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_NON_MAX_SUPPRESSION_V5_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h
new file mode 100644
index 000000000..0c6989cc7
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV5OUT_H__
+#define __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV5OUT_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief Virtual NONMAXSUPPRESSIONV5OUT in Circle
+ */
+class CircleNonMaxSuppressionV5Out final
+    : public FixedArityNode<1, CircleNodeImpl<CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT>>
+{
+public:
+  CircleNonMaxSuppressionV5Out() = default;
+
+public:
+  loco::Node *input(void) const { return at(0)->node(); }
+  void input(loco::Node *node) { at(0)->node(node); }
+
+public:
+  int32_t index(void) const { return _index; }
+  void index(int32_t index) { _index = index; }
+
+private:
+  int32_t _index{-1};
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_NONMAXSUPPRESSIONV5OUT_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h
index 9f5051317..7e80304b0 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSparseToDense.h
@@ -49,7 +49,7 @@ public:
   void validate_indices(bool validate_indices) { _validate_indices = validate_indices; }
 
 private:
-  bool _validate_indices{true};
+  bool _validate_indices{false};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h
index fc638d49f..e355102d6 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleTransposeConv.h
@@ -34,7 +34,8 @@ namespace luci
  *        'out' acutally means 'out' and 'in' of the this node.
  */
 class CircleTransposeConv final
-    : public FixedArityNode<3, CircleNodeImpl<CircleOpcode::TRANSPOSE_CONV>>
+    : public FixedArityNode<4, CircleNodeImpl<CircleOpcode::TRANSPOSE_CONV>>,
+      public LuciNodeMixin<LuciNodeTrait::Bias>
 {
 public:
   loco::Node *inputSizes(void) const { return at(0)->node(); }
@@ -46,6 +47,21 @@ public:
   loco::Node *outBackprop(void) const { return at(2)->node(); }
   void outBackprop(Node *node) { at(2)->node(node); }
 
+  /**
+   * @note  "bias" is optional. When this node has no conceptual bias, "bias()"
+   *        expected to be `luci::CircleOutputExclude` type.
+   *
+   * <Comment on tflite TRANSPOSE_CONV>
+   *
+   * (Circle node has no dependency on tflite, but just for information on converting)
+   * Before TF v2.3.0, tflite TRANSPOSE_CONV didn't support fused bias as argument.
+   * From TF v2.3.0, tflite TRANSPOSE_CONV supports bias as optional 4th argument.
+   *
+   * Ref: https://github.com/tensorflow/tensorflow/commit/43b8f6e710
+   */
+  loco::Node *bias(void) const override { return at(3)->node(); }
+  void bias(loco::Node *node) override { at(3)->node(node); }
+
 public:
   const Padding &padding(void) const { return _padding; }
   void padding(const Padding &padding) { _padding = padding; }
diff --git a/compiler/luci/lang/src/Nodes/CircleConst.cpp b/compiler/luci/lang/src/Nodes/CircleConst.cpp
index 17ff853eb..0d02d32dc 100644
--- a/compiler/luci/lang/src/Nodes/CircleConst.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleConst.cpp
@@ -73,6 +73,7 @@ template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &CircleConst:
 INSTANTIATE(loco::DataType::S64);
 INSTANTIATE(loco::DataType::S32);
 INSTANTIATE(loco::DataType::S16);
+INSTANTIATE(loco::DataType::S8);
 INSTANTIATE(loco::DataType::FLOAT32);
 INSTANTIATE(loco::DataType::U8);
 INSTANTIATE(loco::DataType::BOOL);
diff --git a/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5.test.cpp b/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5.test.cpp
new file mode 100644
index 000000000..ceb74e3df
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleNonMaxSuppressionV5.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleNonMaxSuppressionV5Test, constructor)
+{
+  luci::CircleNonMaxSuppressionV5 nmsv5_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), nmsv5_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::NON_MAX_SUPPRESSION_V5, nmsv5_node.opcode());
+
+  ASSERT_EQ(nullptr, nmsv5_node.boxes());
+  ASSERT_EQ(nullptr, nmsv5_node.scores());
+  ASSERT_EQ(nullptr, nmsv5_node.max_output_size());
+  ASSERT_EQ(nullptr, nmsv5_node.iou_threshold());
+  ASSERT_EQ(nullptr, nmsv5_node.score_threshold());
+  ASSERT_EQ(nullptr, nmsv5_node.soft_nms_sigma());
+}
+
+TEST(CircleNonMaxSuppressionV5Test, input_NEG)
+{
+  luci::CircleNonMaxSuppressionV5 nmsv5_node;
+  luci::CircleNonMaxSuppressionV5 node;
+
+  nmsv5_node.boxes(&node);
+  nmsv5_node.scores(&node);
+  nmsv5_node.max_output_size(&node);
+  nmsv5_node.iou_threshold(&node);
+  nmsv5_node.score_threshold(&node);
+  nmsv5_node.soft_nms_sigma(&node);
+  ASSERT_NE(nullptr, nmsv5_node.boxes());
+  ASSERT_NE(nullptr, nmsv5_node.scores());
+  ASSERT_NE(nullptr, nmsv5_node.max_output_size());
+  ASSERT_NE(nullptr, nmsv5_node.iou_threshold());
+  ASSERT_NE(nullptr, nmsv5_node.score_threshold());
+  ASSERT_NE(nullptr, nmsv5_node.soft_nms_sigma());
+
+  nmsv5_node.boxes(nullptr);
+  nmsv5_node.scores(nullptr);
+  nmsv5_node.max_output_size(nullptr);
+  nmsv5_node.iou_threshold(nullptr);
+  nmsv5_node.score_threshold(nullptr);
+  nmsv5_node.soft_nms_sigma(nullptr);
+  ASSERT_EQ(nullptr, nmsv5_node.boxes());
+  ASSERT_EQ(nullptr, nmsv5_node.scores());
+  ASSERT_EQ(nullptr, nmsv5_node.max_output_size());
+  ASSERT_EQ(nullptr, nmsv5_node.iou_threshold());
+  ASSERT_EQ(nullptr, nmsv5_node.score_threshold());
+  ASSERT_EQ(nullptr, nmsv5_node.soft_nms_sigma());
+}
+
+TEST(CircleNonMaxSuppressionV5Test, arity_NEG)
+{
+  luci::CircleNonMaxSuppressionV5 nmsv5_node;
+
+  ASSERT_NO_THROW(nmsv5_node.arg(5));
+  ASSERT_THROW(nmsv5_node.arg(6), std::out_of_range);
+}
+
+TEST(CircleNonMaxSuppressionV5Test, visit_mutable_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+  {
+  };
+
+  luci::CircleNonMaxSuppressionV5 nmsv5_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(nmsv5_node.accept(&tv), std::exception);
+}
+
+TEST(CircleNonMaxSuppressionV5Test, visit_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeVisitor<void>
+  {
+  };
+
+  luci::CircleNonMaxSuppressionV5 nmsv5_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(nmsv5_node.accept(&tv), std::exception);
+}
diff --git a/compiler/circle2circle/src/CircleExpContract.cpp b/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp
index b56b7eedc..7b427ea03 100644
--- a/compiler/circle2circle/src/CircleExpContract.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleNonMaxSuppressionV5Out.test.cpp
@@ -14,20 +14,19 @@
  * limitations under the License.
  */
 
-#include "CircleExpContract.h"
+#include "luci/IR/Nodes/CircleNonMaxSuppressionV5Out.h"
 
-#include <oops/InternalExn.h>
+#include "luci/IR/CircleDialect.h"
 
-#include <fstream>
-#include <iostream>
+#include <gtest/gtest.h>
 
-bool CircleExpContract::store(const char *ptr, const size_t size) const
+TEST(CircleNonMaxSuppressionV5OutTest, constructor)
 {
-  if (!ptr)
-    INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
+  luci::CircleNonMaxSuppressionV5Out vout_node;
 
-  std::ofstream fs(_filepath.c_str(), std::ofstream::binary);
-  fs.write(ptr, size);
+  ASSERT_EQ(luci::CircleDialect::get(), vout_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT, vout_node.opcode());
 
-  return fs.good();
+  ASSERT_EQ(nullptr, vout_node.input());
+  ASSERT_EQ(-1, vout_node.index());
 }
diff --git a/compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp b/compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp
index de3cf6e9a..03f612ba7 100644
--- a/compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleSparseToDense.test.cpp
@@ -33,7 +33,7 @@ TEST(CircleSparseToDenseTest, constructor)
   ASSERT_EQ(nullptr, stb_node.values());
   ASSERT_EQ(nullptr, stb_node.default_value());
 
-  ASSERT_EQ(true, stb_node.validate_indices());
+  ASSERT_EQ(false, stb_node.validate_indices());
 }
 
 TEST(CircleSparseToDenseTest, input_NEG)
diff --git a/compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp b/compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp
index 429169744..3e0db803f 100644
--- a/compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleTransposeConv.test.cpp
@@ -69,8 +69,8 @@ TEST(CircleTransposeConvTest, arity_NEG)
 {
   luci::CircleTransposeConv trc_node;
 
-  ASSERT_NO_THROW(trc_node.arg(2));
-  ASSERT_THROW(trc_node.arg(3), std::out_of_range);
+  ASSERT_NO_THROW(trc_node.arg(3));
+  ASSERT_THROW(trc_node.arg(4), std::out_of_range);
 }
 
 TEST(CircleTransposeConvTest, visit_mutable_NEG)
diff --git a/compiler/luci/logex/src/FormattedGraph.cpp b/compiler/luci/logex/src/FormattedGraph.cpp
index f04a418ef..bb7c73d5f 100644
--- a/compiler/luci/logex/src/FormattedGraph.cpp
+++ b/compiler/luci/logex/src/FormattedGraph.cpp
@@ -245,10 +245,12 @@ private:
   IMPLEMENT(luci::CircleMul)
   IMPLEMENT(luci::CircleNeg)
   IMPLEMENT(luci::CircleNonMaxSuppressionV4)
+  IMPLEMENT(luci::CircleNonMaxSuppressionV5)
   IMPLEMENT(luci::CircleNotEqual)
   IMPLEMENT(luci::CircleOneHot)
   IMPLEMENT(luci::CirclePack)
   IMPLEMENT(luci::CirclePad)
+  IMPLEMENT(luci::CirclePadV2)
   IMPLEMENT(luci::CirclePow)
   IMPLEMENT(luci::CirclePRelu)
   IMPLEMENT(luci::CircleRange)
@@ -306,6 +308,7 @@ private:
   IMPLEMENT(luci::CircleOutput)
   IMPLEMENT(luci::CircleIfOut)
   IMPLEMENT(luci::CircleNonMaxSuppressionV4Out)
+  IMPLEMENT(luci::CircleNonMaxSuppressionV5Out)
   IMPLEMENT(luci::CircleSplitOut)
   IMPLEMENT(luci::CircleSplitVOut)
   IMPLEMENT(luci::CircleTopKV2Out)
@@ -380,192 +383,848 @@ bool use_ido(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeS
   return true;
 }
 
-bool CircleNodeSummaryBuilderBase::build(const loco::Node *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleAddN *node,
+                  locop::NodeSummary &s)
 {
-  if (node->dialect() != luci::CircleDialect::get())
-    return false;
+  for (uint32_t i = 0; i < node->arity(); ++i)
+    s.args().append("inputs", tbl->lookup(node->inputs(i)));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
 
-#define CIRCLE_NODE(OPCODE, CLASS)                        \
-  if (dynamic_cast<const CLASS *>(node))                  \
-  {                                                       \
-    s.opname(circle_opname(node->opnum()));               \
-    return summary(dynamic_cast<const CLASS *>(node), s); \
-  }
-#include <luci/IR/CircleNodes.lst>
-#undef CIRCLE_NODE
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleAveragePool2D *node,
+                  locop::NodeSummary &s)
+{
+  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
 
-  return false;
+  s.args().append("value", tbl->lookup(node->value()));
+  s.args().append("filter(h,w)", to_str(node->filter()));
+  s.args().append("stride(h,w)", to_str(node->stride()));
+  s.args().append("padding", to_str(node->padding()));
+  s.args().append("fused", to_str(node->fusedActivationFunction()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
 }
 
-bool CircleNodeSummaryBuilder::summary(const luci::CircleAbs *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBatchMatMul *node,
+                  locop::NodeSummary &s)
 {
-  return use_x(tbl(), node, s);
+  s.args().append("x", tbl->lookup(node->x()));
+  s.args().append("y", tbl->lookup(node->y()));
+  s.args().append("adj_x", to_str(node->adj_x()));
+  s.args().append("adj_y", to_str(node->adj_y()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
 }
 
-bool CircleNodeSummaryBuilder::summary(const luci::CircleAdd *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBatchToSpaceND *node,
+                  locop::NodeSummary &s)
 {
-  return use_xy_act(tbl(), node, s);
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("block_shape", tbl->lookup(node->block_shape()));
+  s.args().append("crops", tbl->lookup(node->crops()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
 }
 
-bool CircleNodeSummaryBuilder::summary(const luci::CircleAddN *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleCast *node,
+                  locop::NodeSummary &s)
 {
-  for (uint32_t i = 0; i < node->arity(); ++i)
-    s.args().append("inputs", tbl()->lookup(node->inputs(i)));
+  s.args().append("x", tbl->lookup(node->x()));
+  s.args().append("in_data_type", to_str(node->in_data_type()));
+  s.args().append("out_data_type", to_str(node->out_data_type()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
 
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleConcatenation *node,
+                  locop::NodeSummary &s)
+{
+  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+
+  for (uint32_t i = 0; i < node->numValues(); ++i)
+    s.args().append("values", tbl->lookup(node->values(i)));
+  s.args().append("axis", pepper::str(node->axis()));
+  s.args().append("fused", to_str(node->fusedActivationFunction()));
   s.state(locop::NodeSummary::State::Complete);
   return true;
 }
 
-bool CircleNodeSummaryBuilder::summary(const luci::CircleArgMax *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleConv2D *node,
+                  locop::NodeSummary &s)
 {
-  return use_ido(tbl(), node, s);
+  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+  assert(node->padding() != luci::Padding::UNDEFINED);
+
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("filter", tbl->lookup(node->filter()));
+  s.args().append("bias", tbl->lookup(node->bias()));
+  s.args().append("stride(h,w)", to_str(node->stride()));
+  s.args().append("dilation(h,w)", to_str(node->dilation()));
+  s.args().append("padding", to_str(node->padding()));
+  s.args().append("fused", to_str(node->fusedActivationFunction()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
 }
 
-bool CircleNodeSummaryBuilder::summary(const luci::CircleArgMin *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleCustom *node,
+                  locop::NodeSummary &s)
 {
-  return use_ido(tbl(), node, s);
+  for (uint32_t i = 0; i < node->numInputs(); i++)
+  {
+    s.args().append("input" + std::to_string(i), tbl->lookup(node->inputs(i)));
+  }
+  s.args().append("custom_code", node->custom_code());
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
 }
 
-bool CircleNodeSummaryBuilder::summary(const luci::CircleAveragePool2D *node,
-                                       locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleDepthToSpace *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("block_size", std::to_string(node->block_size()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleDepthwiseConv2D *node,
+                  locop::NodeSummary &s)
 {
   assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+  assert(node->padding() != luci::Padding::UNDEFINED);
 
-  s.args().append("value", tbl()->lookup(node->value()));
-  s.args().append("filter(h,w)", to_str(node->filter()));
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("filter", tbl->lookup(node->filter()));
+  s.args().append("bias", tbl->lookup(node->bias()));
   s.args().append("stride(h,w)", to_str(node->stride()));
+  s.args().append("dilation(h,w)", to_str(node->dilation()));
   s.args().append("padding", to_str(node->padding()));
+  s.args().append("depthMultiplier", std::to_string(node->depthMultiplier()));
   s.args().append("fused", to_str(node->fusedActivationFunction()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
 
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleExpandDims *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("axis", tbl->lookup(node->axis()));
   s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
 
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFill *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("dims", tbl->lookup(node->dims()));
+  s.args().append("value", tbl->lookup(node->value()));
+  s.state(locop::NodeSummary::State::Complete);
   return true;
 }
 
-bool CircleNodeSummaryBuilder::summary(const luci::CircleBatchMatMul *node,
-                                       locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFullyConnected *node,
+                  locop::NodeSummary &s)
 {
-  s.args().append("x", tbl()->lookup(node->x()));
-  s.args().append("y", tbl()->lookup(node->y()));
-  s.args().append("adj_x", to_str(node->adj_x()));
-  s.args().append("adj_y", to_str(node->adj_y()));
+  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("weights", tbl->lookup(node->weights()));
+  s.args().append("bias", tbl->lookup(node->bias()));
+  s.args().append("fused", to_str(node->fusedActivationFunction()));
   s.state(locop::NodeSummary::State::Complete);
   return true;
 }
 
-bool CircleNodeSummaryBuilder::summary(const luci::CircleBatchToSpaceND *node,
-                                       locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleGather *node,
+                  locop::NodeSummary &s)
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("block_shape", tbl()->lookup(node->block_shape()));
-  s.args().append("crops", tbl()->lookup(node->crops()));
+  s.args().append("params", tbl->lookup(node->params()));
+  s.args().append("indices", tbl->lookup(node->indices()));
+  s.args().append("axis", pepper::str(node->axis()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
 
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleGatherNd *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("params", tbl->lookup(node->params()));
+  s.args().append("indices", tbl->lookup(node->indices()));
   s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleIf *node, locop::NodeSummary &s)
+{
+  s.args().append("cond", tbl->lookup(node->cond()));
+  for (uint32_t i = 0; i < node->input_count(); ++i)
+    s.args().append("input", tbl->lookup(node->input(i)));
 
+  if (node->then_graph() != nullptr)
+    s.args().append("then_graph", node->then_graph()->name());
+  else
+    s.args().append("then_branch", pepper::str(node->then_branch()));
+
+  if (node->else_graph() != nullptr)
+    s.args().append("else_graph", node->else_graph()->name());
+  else
+    s.args().append("else_branch", pepper::str(node->else_branch()));
+
+  s.state(locop::NodeSummary::State::Complete);
   return true;
 }
 
-bool CircleNodeSummaryBuilder::summary(const luci::CircleCast *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleL2Normalize *node,
+                  locop::NodeSummary &s)
 {
-  s.args().append("x", tbl()->lookup(node->x()));
-  s.args().append("in_data_type", to_str(node->in_data_type()));
-  s.args().append("out_data_type", to_str(node->out_data_type()));
+  s.args().append("x", tbl->lookup(node->x()));
+  s.args().append("fused_activation_function", to_str(node->fusedActivationFunction()));
   s.state(locop::NodeSummary::State::Complete);
   return true;
 }
 
-bool CircleNodeSummaryBuilder::summary(const luci::CircleCeil *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLeakyRelu *node,
+                  locop::NodeSummary &s)
 {
-  return use_x(tbl(), node, s);
+  s.args().append("features", tbl->lookup(node->features()));
+  s.args().append("alpha", std::to_string(node->alpha()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
 }
 
-bool CircleNodeSummaryBuilder::summary(const luci::CircleConcatenation *node,
-                                       locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLocalResponseNormalization *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("radius", pepper::str(node->radius()));
+  s.args().append("bias", pepper::str(node->bias()));
+  s.args().append("alpha", pepper::str(node->alpha()));
+  s.args().append("beta", pepper::str(node->beta()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLogSoftmax *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("logits", tbl->lookup(node->logits()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMatrixDiag *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("diagonal", tbl->lookup(node->diagonal()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMatrixSetDiag *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("diagonal", tbl->lookup(node->diagonal()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMaxPool2D *node,
+                  locop::NodeSummary &s)
 {
   assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
 
-  for (uint32_t i = 0; i < node->numValues(); ++i)
-    s.args().append("values", tbl()->lookup(node->values(i)));
-  s.args().append("axis", pepper::str(node->axis()));
+  s.args().append("value", tbl->lookup(node->value()));
+  s.args().append("filter(h,w)", to_str(node->filter()));
+  s.args().append("stride(h,w)", to_str(node->stride()));
+  s.args().append("padding", to_str(node->padding()));
   s.args().append("fused", to_str(node->fusedActivationFunction()));
   s.state(locop::NodeSummary::State::Complete);
   return true;
 }
 
-bool CircleNodeSummaryBuilder::summary(const luci::CircleConst *, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMirrorPad *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("paddings", tbl->lookup(node->paddings()));
+  s.args().append("mode", to_str(node->mode()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleNonMaxSuppressionV4 *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("boxes", tbl->lookup(node->boxes()));
+  s.args().append("scores", tbl->lookup(node->scores()));
+  s.args().append("max_output_size", tbl->lookup(node->max_output_size()));
+  s.args().append("iou_threshold", tbl->lookup(node->iou_threshold()));
+  s.args().append("score_threshold", tbl->lookup(node->score_threshold()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleNonMaxSuppressionV5 *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("boxes", tbl->lookup(node->boxes()));
+  s.args().append("scores", tbl->lookup(node->scores()));
+  s.args().append("max_output_size", tbl->lookup(node->max_output_size()));
+  s.args().append("iou_threshold", tbl->lookup(node->iou_threshold()));
+  s.args().append("score_threshold", tbl->lookup(node->score_threshold()));
+  s.args().append("soft_nms_sigma", tbl->lookup(node->soft_nms_sigma()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleOneHot *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("indices", tbl->lookup(node->indices()));
+  s.args().append("depth", tbl->lookup(node->depth()));
+  s.args().append("on_value", tbl->lookup(node->on_value()));
+  s.args().append("off_value", tbl->lookup(node->off_value()));
+  s.args().append("axis", pepper::str(node->axis()));
+
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePack *node,
+                  locop::NodeSummary &s)
+{
+  for (uint32_t i = 0; i < node->values_count(); ++i)
+    s.args().append("values", tbl->lookup(node->values(i)));
+  s.args().append("values_count", pepper::str(node->values_count()));
+  s.args().append("axis", pepper::str(node->axis()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePad *node, locop::NodeSummary &s)
 {
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("paddings", tbl->lookup(node->paddings()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePadV2 *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("paddings", tbl->lookup(node->paddings()));
+  s.args().append("constant_values", tbl->lookup(node->constant_values()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePRelu *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("alpha", tbl->lookup(node->alpha()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleRange *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("start", tbl->lookup(node->start()));
+  s.args().append("limit", tbl->lookup(node->limit()));
+  s.args().append("delta", tbl->lookup(node->delta()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReshape *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("tensor", tbl->lookup(node->tensor()));
+  s.args().append("shape", tbl->lookup(node->shape()));
+  // TODO Show newShape info
   s.state(locop::NodeSummary::State::PartiallyKnown);
   return true;
 }
 
-bool CircleNodeSummaryBuilder::summary(const luci::CircleConv2D *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleResizeBilinear *node,
+                  locop::NodeSummary &s)
 {
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-  assert(node->padding() != luci::Padding::UNDEFINED);
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("size", tbl->lookup(node->size()));
+  s.args().append("align_corners", node->align_corners() ? "true" : "false");
+  s.args().append("half_pixel_centers", node->half_pixel_centers() ? "true" : "false");
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
 
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("filter", tbl()->lookup(node->filter()));
-  s.args().append("bias", tbl()->lookup(node->bias()));
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleResizeNearestNeighbor *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("size", tbl->lookup(node->size()));
+  s.args().append("align_corners", node->align_corners() ? "true" : "false");
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
 
-  s.args().append("stride(h,w)", to_str(node->stride()));
-  s.args().append("dilation(h,w)", to_str(node->dilation()));
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReverseSequence *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("seq_lengths", tbl->lookup(node->seq_lengths()));
+  s.args().append("seq_axis", std::to_string(node->seq_axis()));
+  s.args().append("batch_axis", std::to_string(node->batch_axis()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
 
-  s.args().append("padding", to_str(node->padding()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReverseV2 *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("tensor", tbl->lookup(node->tensor()));
+  s.args().append("axis", tbl->lookup(node->axis()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
 
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleScatterNd *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("indices", tbl->lookup(node->indices()));
+  s.args().append("updates", tbl->lookup(node->updates()));
+  s.args().append("shape", tbl->lookup(node->shape()));
   s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
 
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSegmentSum *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("segment_ids", tbl->lookup(node->segment_ids()));
+  s.state(locop::NodeSummary::State::Complete);
   return true;
 }
 
-bool CircleNodeSummaryBuilder::summary(const luci::CircleCos *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSelect *node,
+                  locop::NodeSummary &s)
 {
-  return use_x(tbl(), node, s);
+  s.args().append("condition", tbl->lookup(node->condition()));
+  s.args().append("t", tbl->lookup(node->t()));
+  s.args().append("e", tbl->lookup(node->e()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
 }
 
-bool CircleNodeSummaryBuilder::summary(const luci::CircleCustom *node, locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSelectV2 *node,
+                  locop::NodeSummary &s)
 {
-  for (uint32_t i = 0; i < node->numInputs(); i++)
+  s.args().append("condition", tbl->lookup(node->condition()));
+  s.args().append("t", tbl->lookup(node->t()));
+  s.args().append("e", tbl->lookup(node->e()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleShape *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("out_type", to_str(node->out_type()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSlice *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("begin", tbl->lookup(node->begin()));
+  s.args().append("size", tbl->lookup(node->size()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSoftmax *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("logits", tbl->lookup(node->logits()));
+  s.args().append("beta", pepper::str(node->beta()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSpaceToBatchND *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("block_shape", tbl->lookup(node->block_shape()));
+  s.args().append("paddings", tbl->lookup(node->paddings()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSpaceToDepth *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("block_size", pepper::str(node->block_size()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSparseToDense *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("indices", tbl->lookup(node->indices()));
+  s.args().append("output_shape", tbl->lookup(node->output_shape()));
+  s.args().append("values", tbl->lookup(node->values()));
+  s.args().append("default_value", tbl->lookup(node->default_value()));
+  s.args().append("Validate_indices", pepper::str(node->validate_indices()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSplit *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("split_dim", tbl->lookup(node->split_dim()));
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("num_split", pepper::str(node->num_split()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSplitV *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("size_splits", tbl->lookup(node->size_splits()));
+  s.args().append("split_dim", tbl->lookup(node->split_dim()));
+  s.args().append("num_split", pepper::str(node->num_split()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSqueeze *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+
+  std::stringstream ss{"("};
+  for (size_t i = 0; i < node->squeeze_dims().size(); ++i)
   {
-    s.args().append("input" + std::to_string(i), tbl()->lookup(node->inputs(i)));
+    if (i != 0)
+      ss << ", ";
+    ss << node->squeeze_dims()[i];
   }
-  s.args().append("custom_code", node->custom_code());
+  ss << ")";
+  s.args().append("squeeze_dims", ss.str());
   s.state(locop::NodeSummary::State::Complete);
   return true;
 }
 
-bool CircleNodeSummaryBuilder::summary(const luci::CircleDepthToSpace *node,
-                                       locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleStridedSlice *node,
+                  locop::NodeSummary &s)
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("block_size", std::to_string(node->block_size()));
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("begin", tbl->lookup(node->begin()));
+  s.args().append("end", tbl->lookup(node->end()));
+  s.args().append("strides", tbl->lookup(node->strides()));
+  s.args().append("begin_mask", pepper::str(node->begin_mask()));
+  s.args().append("end_mask", pepper::str(node->end_mask()));
+  s.args().append("ellipsis_mask", pepper::str(node->ellipsis_mask()));
+  s.args().append("new_axis_mask", pepper::str(node->new_axis_mask()));
+  s.args().append("shrink_axis_mask", pepper::str(node->shrink_axis_mask()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTile *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("multiples", tbl->lookup(node->multiples()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
 
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTopKV2 *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("k", tbl->lookup(node->k()));
   s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
 
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTranspose *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("a", tbl->lookup(node->a()));
+  s.args().append("perm", tbl->lookup(node->perm()));
+  s.state(locop::NodeSummary::State::Complete);
   return true;
 }
 
-bool CircleNodeSummaryBuilder::summary(const luci::CircleDepthwiseConv2D *node,
-                                       locop::NodeSummary &s) const
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTransposeConv *node,
+                  locop::NodeSummary &s)
 {
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
   assert(node->padding() != luci::Padding::UNDEFINED);
 
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("filter", tbl()->lookup(node->filter()));
-  s.args().append("bias", tbl()->lookup(node->bias()));
-
+  s.args().append("inputSizes", tbl->lookup(node->inputSizes()));
+  s.args().append("filter", tbl->lookup(node->filter()));
+  s.args().append("outBackprop", tbl->lookup(node->outBackprop()));
   s.args().append("stride(h,w)", to_str(node->stride()));
-  s.args().append("dilation(h,w)", to_str(node->dilation()));
   s.args().append("padding", to_str(node->padding()));
-  s.args().append("depthMultiplier", std::to_string(node->depthMultiplier()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnique *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("idx_out_type", to_str(node->idx_out_type()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnpack *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("value", tbl->lookup(node->value()));
+  s.args().append("num", pepper::str(node->num()));
+  s.args().append("axis", pepper::str(node->axis()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhere *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("condition", tbl->lookup(node->condition()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhile *node,
+                  locop::NodeSummary &s)
+{
+  for (uint32_t i = 0; i < node->input_count(); ++i)
+    s.args().append("input", tbl->lookup(node->input(i)));
+
+  if (node->cond_graph() != nullptr)
+    s.args().append("cond_graph", node->cond_graph()->name());
+  else
+    s.args().append("cond_branch", pepper::str(node->cond_branch()));
+
+  if (node->body_graph() != nullptr)
+    s.args().append("body_graph", node->body_graph()->name());
+  else
+    s.args().append("body_branch", pepper::str(node->body_branch()));
+
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTopKV2Out *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("topkv2", tbl->lookup(node->input()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUniqueOut *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("unique", tbl->lookup(node->input()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnpackOut *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("unpack", tbl->lookup(node->input()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhileOut *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("while", tbl->lookup(node->input()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleOutput *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("from", tbl->lookup(node->from()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBCQFullyConnected *node,
+                  locop::NodeSummary &s)
+{
+  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("weights_scales", tbl->lookup(node->weights_scales()));
+  s.args().append("weights_binary", tbl->lookup(node->weights_binary()));
+  s.args().append("bias", tbl->lookup(node->bias()));
+  s.args().append("weights_clusters", tbl->lookup(node->weights_clusters()));
   s.args().append("fused", to_str(node->fusedActivationFunction()));
+  s.args().append("weights_hidden_size", pepper::str(node->weights_hidden_size()));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
 
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBCQGather *node,
+                  locop::NodeSummary &s)
+{
+  s.args().append("input_scales", tbl->lookup(node->input_scales()));
+  s.args().append("input_binary", tbl->lookup(node->input_binary()));
+  s.args().append("indices", tbl->lookup(node->indices()));
+  s.args().append("input_clusters", tbl->lookup(node->input_clusters()));
+  s.args().append("axis", pepper::str(node->axis()));
+  s.args().append("input_hidden_size", pepper::str(node->input_hidden_size()));
   s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool summary_node(const locop::SymbolTable *tbl, const luci::CircleInstanceNorm *node,
+                  locop::NodeSummary &s)
+{
+  auto fused = node->fusedActivationFunction();
+  assert(fused != luci::FusedActFunc::UNDEFINED);
+
+  s.args().append("input", tbl->lookup(node->input()));
+  s.args().append("gamma", tbl->lookup(node->gamma()));
+  s.args().append("beta", tbl->lookup(node->beta()));
+  s.args().append("epsilon", pepper::str(node->epsilon()));
+  s.args().append("fused_activation_function", to_str(fused));
+  s.state(locop::NodeSummary::State::Complete);
+  return true;
+}
+
+bool CircleNodeSummaryBuilderBase::build(const loco::Node *node, locop::NodeSummary &s) const
+{
+  if (node->dialect() != luci::CircleDialect::get())
+    return false;
+
+#define CIRCLE_NODE(OPCODE, CLASS)                        \
+  if (dynamic_cast<const CLASS *>(node))                  \
+  {                                                       \
+    s.opname(circle_opname(node->opnum()));               \
+    return summary(dynamic_cast<const CLASS *>(node), s); \
+  }
+#include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_NODE
+
+  return false;
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleAbs *node, locop::NodeSummary &s) const
+{
+  return use_x(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleAdd *node, locop::NodeSummary &s) const
+{
+  return use_xy_act(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleAddN *node, locop::NodeSummary &s) const
+{
+  return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleArgMax *node, locop::NodeSummary &s) const
+{
+  return use_ido(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleArgMin *node, locop::NodeSummary &s) const
+{
+  return use_ido(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleAveragePool2D *node,
+                                       locop::NodeSummary &s) const
+{
+  return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleBatchMatMul *node,
+                                       locop::NodeSummary &s) const
+{
+  return summary_node(tbl(), node, s);
+}
 
+bool CircleNodeSummaryBuilder::summary(const luci::CircleBatchToSpaceND *node,
+                                       locop::NodeSummary &s) const
+{
+  return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleCast *node, locop::NodeSummary &s) const
+{
+  return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleCeil *node, locop::NodeSummary &s) const
+{
+  return use_x(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleConcatenation *node,
+                                       locop::NodeSummary &s) const
+{
+  return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleConst *, locop::NodeSummary &s) const
+{
+  s.state(locop::NodeSummary::State::PartiallyKnown);
   return true;
 }
 
+bool CircleNodeSummaryBuilder::summary(const luci::CircleConv2D *node, locop::NodeSummary &s) const
+{
+  return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleCos *node, locop::NodeSummary &s) const
+{
+  return use_x(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleCustom *node, locop::NodeSummary &s) const
+{
+  return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleDepthToSpace *node,
+                                       locop::NodeSummary &s) const
+{
+  return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CircleDepthwiseConv2D *node,
+                                       locop::NodeSummary &s) const
+{
+  return summary_node(tbl(), node, s);
+}
+
 bool CircleNodeSummaryBuilder::summary(const luci::CircleDiv *node, locop::NodeSummary &s) const
 {
   return use_xy(tbl(), node, s);
@@ -584,10 +1243,7 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleExp *node, locop::NodeS
 bool CircleNodeSummaryBuilder::summary(const luci::CircleExpandDims *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("axis", tbl()->lookup(node->axis()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleFloor *node, locop::NodeSummary &s) const
@@ -609,44 +1265,24 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleFloorMod *node,
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleFill *node, locop::NodeSummary &s) const
 {
-  s.args().append("dims", tbl()->lookup(node->dims()));
-  s.args().append("value", tbl()->lookup(node->value()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleFullyConnected *node,
                                        locop::NodeSummary &s) const
 {
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("weights", tbl()->lookup(node->weights()));
-  s.args().append("bias", tbl()->lookup(node->bias()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleGather *node, locop::NodeSummary &s) const
 {
-  s.args().append("params", tbl()->lookup(node->params()));
-  s.args().append("indices", tbl()->lookup(node->indices()));
-  s.args().append("axis", pepper::str(node->axis()));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleGatherNd *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("params", tbl()->lookup(node->params()));
-  s.args().append("indices", tbl()->lookup(node->indices()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleGreater *node, locop::NodeSummary &s) const
@@ -662,32 +1298,13 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleGreaterEqual *node,
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleIf *node, locop::NodeSummary &s) const
 {
-  s.args().append("cond", tbl()->lookup(node->cond()));
-  for (uint32_t i = 0; i < node->input_count(); ++i)
-    s.args().append("input", tbl()->lookup(node->input(i)));
-
-  if (node->then_graph() != nullptr)
-    s.args().append("then_graph", node->then_graph()->name());
-  else
-    s.args().append("then_branch", pepper::str(node->then_branch()));
-
-  if (node->else_graph() != nullptr)
-    s.args().append("else_graph", node->else_graph()->name());
-  else
-    s.args().append("else_branch", pepper::str(node->else_branch()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleL2Normalize *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("x", tbl()->lookup(node->x()));
-  s.args().append("fused_activation_function", to_str(node->fusedActivationFunction()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleLess *node, locop::NodeSummary &s) const
@@ -704,22 +1321,13 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleLessEqual *node,
 bool CircleNodeSummaryBuilder::summary(const luci::CircleLeakyRelu *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("features", tbl()->lookup(node->features()));
-  s.args().append("alpha", std::to_string(node->alpha()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleLocalResponseNormalization *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("radius", pepper::str(node->radius()));
-  s.args().append("bias", pepper::str(node->bias()));
-  s.args().append("alpha", pepper::str(node->alpha()));
-  s.args().append("beta", pepper::str(node->beta()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleLog *node, locop::NodeSummary &s) const
@@ -754,26 +1362,19 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleLogistic *node,
 bool CircleNodeSummaryBuilder::summary(const luci::CircleLogSoftmax *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("logits", tbl()->lookup(node->logits()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleMatrixDiag *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("diagonal", tbl()->lookup(node->diagonal()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleMatrixSetDiag *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("diagonal", tbl()->lookup(node->diagonal()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleMaximum *node, locop::NodeSummary &s) const
@@ -784,17 +1385,7 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleMaximum *node, locop::N
 bool CircleNodeSummaryBuilder::summary(const luci::CircleMaxPool2D *node,
                                        locop::NodeSummary &s) const
 {
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
-  s.args().append("value", tbl()->lookup(node->value()));
-  s.args().append("filter(h,w)", to_str(node->filter()));
-  s.args().append("stride(h,w)", to_str(node->stride()));
-  s.args().append("padding", to_str(node->padding()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleMean *node, locop::NodeSummary &s) const
@@ -810,11 +1401,7 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleMinimum *node, locop::N
 bool CircleNodeSummaryBuilder::summary(const luci::CircleMirrorPad *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("paddings", tbl()->lookup(node->paddings()));
-  s.args().append("mode", to_str(node->mode()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleMul *node, locop::NodeSummary &s) const
@@ -830,14 +1417,13 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleNeg *node, locop::NodeS
 bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV4 *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("boxes", pepper::str(node->boxes()));
-  s.args().append("scores", pepper::str(node->scores()));
-  s.args().append("max_output_size", pepper::str(node->max_output_size()));
-  s.args().append("iou_threshold", pepper::str(node->iou_threshold()));
-  s.args().append("score_threshold", pepper::str(node->score_threshold()));
+  return summary_node(tbl(), node, s);
+}
 
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV5 *node,
+                                       locop::NodeSummary &s) const
+{
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleNotEqual *node,
@@ -848,32 +1434,22 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleNotEqual *node,
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleOneHot *node, locop::NodeSummary &s) const
 {
-  s.args().append("indices", tbl()->lookup(node->indices()));
-  s.args().append("depth", tbl()->lookup(node->depth()));
-  s.args().append("on_value", tbl()->lookup(node->on_value()));
-  s.args().append("off_value", tbl()->lookup(node->off_value()));
-  s.args().append("axis", pepper::str(node->axis()));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CirclePack *node, locop::NodeSummary &s) const
 {
-  for (uint32_t i = 0; i < node->values_count(); ++i)
-    s.args().append("values", tbl()->lookup(node->values(i)));
-  s.args().append("values_count", pepper::str(node->values_count()));
-  s.args().append("axis", pepper::str(node->axis()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CirclePad *node, locop::NodeSummary &s) const
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("paddings", tbl()->lookup(node->paddings()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
+}
+
+bool CircleNodeSummaryBuilder::summary(const luci::CirclePadV2 *node, locop::NodeSummary &s) const
+{
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CirclePow *node, locop::NodeSummary &s) const
@@ -883,20 +1459,12 @@ bool CircleNodeSummaryBuilder::summary(const luci::CirclePow *node, locop::NodeS
 
 bool CircleNodeSummaryBuilder::summary(const luci::CirclePRelu *node, locop::NodeSummary &s) const
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("alpha", tbl()->lookup(node->alpha()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleRange *node, locop::NodeSummary &s) const
 {
-  s.args().append("start", tbl()->lookup(node->start()));
-  s.args().append("limit", tbl()->lookup(node->limit()));
-  s.args().append("delta", tbl()->lookup(node->delta()));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleRank *node, locop::NodeSummary &s) const
@@ -946,52 +1514,31 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleReluN1To1 *node,
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleReshape *node, locop::NodeSummary &s) const
 {
-  s.args().append("tensor", tbl()->lookup(node->tensor()));
-  s.args().append("shape", tbl()->lookup(node->shape()));
-  // TODO Show newShape info
-  s.state(locop::NodeSummary::State::PartiallyKnown);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleResizeBilinear *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("size", tbl()->lookup(node->size()));
-  s.args().append("align_corners", node->align_corners() ? "true" : "false");
-  s.args().append("half_pixel_centers", node->half_pixel_centers() ? "true" : "false");
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleResizeNearestNeighbor *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("size", tbl()->lookup(node->size()));
-  s.args().append("align_corners", node->align_corners() ? "true" : "false");
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleReverseSequence *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("seq_lengths", tbl()->lookup(node->seq_lengths()));
-  s.args().append("seq_axis", std::to_string(node->seq_axis()));
-  s.args().append("batch_axis", std::to_string(node->batch_axis()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleReverseV2 *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("tensor", tbl()->lookup(node->tensor()));
-  s.args().append("axis", tbl()->lookup(node->axis()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleRound *node, locop::NodeSummary &s) const
@@ -1007,47 +1554,29 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleRsqrt *node, locop::Nod
 bool CircleNodeSummaryBuilder::summary(const luci::CircleScatterNd *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("indices", tbl()->lookup(node->indices()));
-  s.args().append("updates", tbl()->lookup(node->updates()));
-  s.args().append("shape", tbl()->lookup(node->shape()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleSegmentSum *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("segment_ids", tbl()->lookup(node->segment_ids()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleSelect *node, locop::NodeSummary &s) const
 {
-  s.args().append("condition", tbl()->lookup(node->condition()));
-  s.args().append("t", tbl()->lookup(node->t()));
-  s.args().append("e", tbl()->lookup(node->e()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleSelectV2 *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("condition", tbl()->lookup(node->condition()));
-  s.args().append("t", tbl()->lookup(node->t()));
-  s.args().append("e", tbl()->lookup(node->e()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleShape *node, locop::NodeSummary &s) const
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("out_type", to_str(node->out_type()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleSin *node, locop::NodeSummary &s) const
@@ -1057,82 +1586,40 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleSin *node, locop::NodeS
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleSlice *node, locop::NodeSummary &s) const
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("begin", tbl()->lookup(node->begin()));
-  s.args().append("size", tbl()->lookup(node->size()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleSoftmax *node, locop::NodeSummary &s) const
 {
-  s.args().append("logits", tbl()->lookup(node->logits()));
-  s.args().append("beta", pepper::str(node->beta()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleSpaceToBatchND *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("block_shape", tbl()->lookup(node->block_shape()));
-  s.args().append("paddings", tbl()->lookup(node->paddings()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleSpaceToDepth *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("block_size", pepper::str(node->block_size()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleSparseToDense *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("indices", tbl()->lookup(node->indices()));
-  s.args().append("output_shape", tbl()->lookup(node->output_shape()));
-  s.args().append("values", tbl()->lookup(node->values()));
-  s.args().append("default_value", tbl()->lookup(node->default_value()));
-
-  s.args().append("Validate_indices", pepper::str(node->validate_indices()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleSplit *node, locop::NodeSummary &s) const
 {
-  s.args().append("split_dim", tbl()->lookup(node->split_dim()));
-  s.args().append("input", tbl()->lookup(node->input()));
-
-  s.args().append("num_split", pepper::str(node->num_split()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleSplitV *node, locop::NodeSummary &s) const
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("size_splits", tbl()->lookup(node->size_splits()));
-  s.args().append("split_dim", tbl()->lookup(node->split_dim()));
-
-  s.args().append("num_split", pepper::str(node->num_split()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleSqrt *node, locop::NodeSummary &s) const
@@ -1153,38 +1640,13 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleSquaredDifference *node
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleSqueeze *node, locop::NodeSummary &s) const
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-
-  std::stringstream ss{"("};
-  for (size_t i = 0; i < node->squeeze_dims().size(); ++i)
-  {
-    if (i != 0)
-      ss << ", ";
-    ss << node->squeeze_dims()[i];
-  }
-  ss << ")";
-
-  s.args().append("squeeze_dims", ss.str());
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleStridedSlice *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("begin", tbl()->lookup(node->begin()));
-  s.args().append("end", tbl()->lookup(node->end()));
-  s.args().append("strides", tbl()->lookup(node->strides()));
-
-  s.args().append("begin_mask", pepper::str(node->begin_mask()));
-  s.args().append("end_mask", pepper::str(node->end_mask()));
-  s.args().append("ellipsis_mask", pepper::str(node->ellipsis_mask()));
-  s.args().append("new_axis_mask", pepper::str(node->new_axis_mask()));
-  s.args().append("shrink_axis_mask", pepper::str(node->shrink_axis_mask()));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleSub *node, locop::NodeSummary &s) const
@@ -1204,92 +1666,44 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleTanh *node, locop::Node
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleTile *node, locop::NodeSummary &s) const
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("multiples", tbl()->lookup(node->multiples()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleTopKV2 *node, locop::NodeSummary &s) const
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("k", tbl()->lookup(node->k()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleTranspose *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("a", tbl()->lookup(node->a()));
-  s.args().append("perm", tbl()->lookup(node->perm()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleTransposeConv *node,
                                        locop::NodeSummary &s) const
 {
-  assert(node->padding() != luci::Padding::UNDEFINED);
-
-  s.args().append("inputSizes", tbl()->lookup(node->inputSizes()));
-  s.args().append("filter", tbl()->lookup(node->filter()));
-  s.args().append("outBackprop", tbl()->lookup(node->outBackprop()));
-
-  s.args().append("stride(h,w)", to_str(node->stride()));
-  s.args().append("padding", to_str(node->padding()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleUnique *node, locop::NodeSummary &s) const
 {
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("idx_out_type", to_str(node->idx_out_type()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleUnpack *node, locop::NodeSummary &s) const
 {
-  s.args().append("value", tbl()->lookup(node->value()));
-
-  s.args().append("num", pepper::str(node->num()));
-  s.args().append("axis", pepper::str(node->axis()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleWhere *node, locop::NodeSummary &s) const
 {
-  s.args().append("condition", tbl()->lookup(node->condition()));
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleWhile *node, locop::NodeSummary &s) const
 {
-  for (uint32_t i = 0; i < node->input_count(); ++i)
-    s.args().append("input", tbl()->lookup(node->input(i)));
-
-  if (node->cond_graph() != nullptr)
-    s.args().append("cond_graph", node->cond_graph()->name());
-  else
-    s.args().append("cond_branch", pepper::str(node->cond_branch()));
-
-  if (node->body_graph() != nullptr)
-    s.args().append("body_graph", node->body_graph()->name());
-  else
-    s.args().append("body_branch", pepper::str(node->body_branch()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleZerosLike *node,
@@ -1313,29 +1727,19 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleSplitVOut *node,
 bool CircleNodeSummaryBuilder::summary(const luci::CircleTopKV2Out *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("topkv2", tbl()->lookup(node->input()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleUniqueOut *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("unique", tbl()->lookup(node->input()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleUnpackOut *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("unpack", tbl()->lookup(node->input()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleIfOut *node, locop::NodeSummary &s) const
@@ -1349,14 +1753,16 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV4Out
   return use_input(tbl(), node, s);
 }
 
-bool CircleNodeSummaryBuilder::summary(const luci::CircleWhileOut *node,
+bool CircleNodeSummaryBuilder::summary(const luci::CircleNonMaxSuppressionV5Out *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("while", tbl()->lookup(node->input()));
-
-  s.state(locop::NodeSummary::State::Complete);
+  return use_input(tbl(), node, s);
+}
 
-  return true;
+bool CircleNodeSummaryBuilder::summary(const luci::CircleWhileOut *node,
+                                       locop::NodeSummary &s) const
+{
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleInput *, locop::NodeSummary &s) const
@@ -1367,61 +1773,25 @@ bool CircleNodeSummaryBuilder::summary(const luci::CircleInput *, locop::NodeSum
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleOutput *node, locop::NodeSummary &s) const
 {
-  s.args().append("from", tbl()->lookup(node->from()));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleBCQFullyConnected *node,
                                        locop::NodeSummary &s) const
 {
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("weights_scales", tbl()->lookup(node->weights_scales()));
-  s.args().append("weights_binary", tbl()->lookup(node->weights_binary()));
-  s.args().append("bias", tbl()->lookup(node->bias()));
-  s.args().append("weights_clusters", tbl()->lookup(node->weights_clusters()));
-
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-  s.args().append("weights_hidden_size", pepper::str(node->weights_hidden_size()));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleBCQGather *node,
                                        locop::NodeSummary &s) const
 {
-  s.args().append("input_scales", tbl()->lookup(node->input_scales()));
-  s.args().append("input_binary", tbl()->lookup(node->input_binary()));
-  s.args().append("indices", tbl()->lookup(node->indices()));
-  s.args().append("input_clusters", tbl()->lookup(node->input_clusters()));
-
-  s.args().append("axis", pepper::str(node->axis()));
-  s.args().append("input_hidden_size", pepper::str(node->input_hidden_size()));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 bool CircleNodeSummaryBuilder::summary(const luci::CircleInstanceNorm *node,
                                        locop::NodeSummary &s) const
 {
-  auto fused = node->fusedActivationFunction();
-  assert(fused != luci::FusedActFunc::UNDEFINED);
-
-  s.args().append("input", tbl()->lookup(node->input()));
-  s.args().append("gamma", tbl()->lookup(node->gamma()));
-  s.args().append("beta", tbl()->lookup(node->beta()));
-  s.args().append("epsilon", pepper::str(node->epsilon()));
-  s.args().append("fused_activation_function", to_str(fused));
-
-  s.state(locop::NodeSummary::State::Complete);
-
-  return true;
+  return summary_node(tbl(), node, s);
 }
 
 } // namespace
diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h
index 312749f83..a832844f8 100644
--- a/compiler/luci/pass/include/luci/CircleOptimizer.h
+++ b/compiler/luci/pass/include/luci/CircleOptimizer.h
@@ -32,6 +32,7 @@ public:
   {
     enum Algorithm
     {
+      FuseBatchNormWithTConv,
       FuseBCQ,
       FuseInstanceNorm,
       ResolveCustomOpAdd,
@@ -39,6 +40,7 @@ public:
       ResolveCustomOpMatMul,
       QuantizeDequantizeWeights,
       QuantizeWithMinMax,
+      Requantize,
     };
 
     enum AlgorithmParameters
diff --git a/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConv.h b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConv.h
new file mode 100644
index 000000000..d3e930a36
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FuseBatchNormWithTConv.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
+#define __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse Batch Normalization into CircleTransposeConv
+ */
+struct FuseBatchNormWithTConvPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FuseBatchNormWithTConvPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_BATCH_NORM_WITH_TCONV_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/RequantizePass.h b/compiler/luci/pass/include/luci/Pass/RequantizePass.h
new file mode 100644
index 000000000..2442b24ea
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RequantizePass.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REQUANTIZE_PASS_H__
+#define __LUCI_REQUANTIZE_PASS_H__
+
+#include <loco.h>
+
+#include <logo/Pass.h>
+
+#include <luci/Pass/QuantizationParameters.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to quantize weights
+ */
+class RequantizePass : public logo::Pass
+{
+public:
+  RequantizePass(loco::DataType input_dtype, loco::DataType output_dtype)
+      : _input_dtype{input_dtype}, _output_dtype{output_dtype}
+  {
+    // DO NOTHING
+  }
+  virtual const char *name(void) const { return "luci::RequantizePass"; }
+
+public:
+  bool run(loco::Graph *graph);
+
+private:
+  loco::DataType _input_dtype;
+  loco::DataType _output_dtype;
+};
+
+} // namespace luci
+
+#endif //__LUCI_REQUANTIZE_PASS_H__
diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp
index 2edf7a9c6..2ee759b4e 100644
--- a/compiler/luci/pass/src/CircleOptimizer.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.cpp
@@ -16,11 +16,13 @@
 
 #include "luci/CircleOptimizer.h"
 
+#include "luci/Pass/FuseBatchNormWithTConv.h"
 #include "luci/Pass/FuseBCQPass.h"
 #include "luci/Pass/FuseInstanceNormPass.h"
 #include "luci/Pass/ResolveCustomOpAddPass.h"
 #include "luci/Pass/ResolveCustomOpBatchMatMulPass.h"
 #include "luci/Pass/ResolveCustomOpMatMulPass.h"
+#include "luci/Pass/RequantizePass.h"
 #include "luci/Pass/QuantizeWithMinMaxPass.h"
 #include "luci/Pass/QuantizeDequantizeWeightsPass.h"
 // TODO add more passes
@@ -34,6 +36,7 @@
 #include "ProgressReporter.h"
 #include "CircleOptimizerUtils.h"
 
+#include <luci/IR/CircleNodes.h>
 #include <logo/Phase.h>
 
 #include <memory>
@@ -125,6 +128,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
   {
     phase.emplace_back(std::make_unique<FuseBCQPass>());
   }
+  if (_options->query(Options::Algorithm::FuseBatchNormWithTConv))
+  {
+    phase.emplace_back(std::make_unique<FuseBatchNormWithTConvPass>());
+  }
 
   // Shape inference is needed for added nodes doing above transformations
   phase.emplace_back(std::make_unique<luci::ShapeInferencePass>());
@@ -163,6 +170,14 @@ void CircleOptimizer::quantize(loco::Graph *g) const
       throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
                                to_string(fakeq_supported_granularity));
 
+    // Clear existing quantparams before doing fake quantization
+    for (auto node : loco::active_nodes(loco::output_nodes(g)))
+    {
+      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+      if (circle_node->quantparam() != nullptr)
+        circle_node->quantparam(nullptr);
+    }
+
     luci::QuantizeDequantizeWeightsPass fake_quantizer(
         str_to_dtype(input_dtype), str_to_dtype(output_dtype), str_to_granularity(granularity));
     fake_quantizer.run(g);
@@ -196,6 +211,27 @@ void CircleOptimizer::quantize(loco::Graph *g) const
     quantizer.run(g);
   }
 
+  // Requantize
+  if (_options->query(Options::Algorithm::Requantize))
+  {
+    static const std::vector<std::string> rq_supported_input_dtype{"int8"};
+    static const std::vector<std::string> rq_supported_output_dtype{"uint8"};
+
+    auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype);
+    auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype);
+
+    if (!in_array(to_lower_case(input_dtype), rq_supported_input_dtype))
+      throw std::runtime_error("Unsupported input type. List of supported input types: " +
+                               to_string(rq_supported_input_dtype));
+
+    if (!in_array(to_lower_case(output_dtype), rq_supported_output_dtype))
+      throw std::runtime_error("Unsupported output type. List of supported output types: " +
+                               to_string(rq_supported_output_dtype));
+
+    luci::RequantizePass requantizer(str_to_dtype(input_dtype), str_to_dtype(output_dtype));
+    requantizer.run(g);
+  }
+
   logo::Phase phase;
 
   // Do Shape/Type inference
diff --git a/compiler/luci/pass/src/FuseBCQPass.cpp b/compiler/luci/pass/src/FuseBCQPass.cpp
index 260de5b30..7aa2e3e80 100644
--- a/compiler/luci/pass/src/FuseBCQPass.cpp
+++ b/compiler/luci/pass/src/FuseBCQPass.cpp
@@ -38,9 +38,9 @@ const std::string node_name_prefix(luci::NodeName node_name)
 {
   std::string prefix = node_name;
 
-  if (prefix.find("ReadVariableOp/resource/") != std::string::npos)
+  if (prefix.find("/ReadVariableOp/resource") != std::string::npos)
   {
-    const auto start_index = prefix.find("ReadVariableOp/resource/");
+    const auto start_index = prefix.find("/ReadVariableOp/resource");
 
     const auto left_prefix = prefix.substr(0, start_index);
     const auto right_prefix = prefix.substr(start_index + 24);
diff --git a/compiler/luci/pass/src/FuseBatchNormWithTConv.cpp b/compiler/luci/pass/src/FuseBatchNormWithTConv.cpp
new file mode 100644
index 000000000..e39455b1a
--- /dev/null
+++ b/compiler/luci/pass/src/FuseBatchNormWithTConv.cpp
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseBatchNormWithTConv.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+/**
+ *  NOTE TF's fusedBatchNorm is converted to mul and add of Circle.
+ *
+ *  BEFORE
+ *
+ *         [CircleTransposeConv]
+ *                  |
+ *                [mul]
+ *                  |
+ *                [add]
+ *  AFTER
+ *
+ *         [CircleTransposeConv]
+ */
+bool fused_batch_norm_with_tconv(luci::CircleTransposeConv *tconv)
+{
+  // check whether it has bias or not. This optimization works only if it doesn't.
+  auto bias = dynamic_cast<luci::CircleOutputExclude *>(tconv->bias());
+  if (not bias)
+    return false;
+
+  // get weight of tconv
+  auto filter = dynamic_cast<luci::CircleConst *>(tconv->filter());
+  if (not filter)
+    return false;
+  if (filter->dtype() != loco::DataType::FLOAT32)
+    return false;
+
+  // get mul node
+  auto tconv_output = loco::succs(tconv);
+  assert(tconv_output.size() == 1);
+  auto mul = dynamic_cast<luci::CircleMul *>(*tconv_output.begin());
+  if (not mul)
+    return false;
+  if (mul->dtype() != loco::DataType::FLOAT32)
+    return false;
+
+  // get add node
+  auto mul_output = loco::succs(mul);
+  assert(mul_output.size() == 1);
+  auto add = dynamic_cast<luci::CircleAdd *>(*mul_output.begin());
+  if (not add)
+    return false;
+  if (add->dtype() != loco::DataType::FLOAT32)
+    return false;
+  if (add->fusedActivationFunction() != luci::FusedActFunc::NONE &&
+      add->fusedActivationFunction() != luci::FusedActFunc::RELU6)
+    return false;
+
+  // get scale of batchnorm
+  auto scale = dynamic_cast<luci::CircleConst *>(mul->y());
+  if (not scale)
+    return false;
+
+  // scale dim(0) == tconv filter channel dim
+  if (filter->rank() != 4)
+    return false;
+  auto filter_channel_dim = filter->dim(3).value();
+  if (scale->rank() != 1)
+    return false;
+  auto scale_dim = scale->dim(0).value();
+  if (filter_channel_dim != scale_dim)
+    return false;
+
+  // get shift of batchnorm
+  auto shift = dynamic_cast<luci::CircleConst *>(add->y());
+  if (not shift)
+    return false;
+
+  // shift dim(0) == tconv filter channel dim
+  if (shift->rank() != 1)
+    return false;
+  auto shift_dim = shift->dim(0).value();
+  if (filter_channel_dim != shift_dim)
+    return false;
+
+  // filter weight = filter weight * mul(scale) + add(shift)
+  uint32_t filter_batch_dim = filter->dim(0).value();
+  uint32_t filter_height_dim = filter->dim(1).value();
+  uint32_t filter_width_dim = filter->dim(2).value();
+  for (uint32_t c = 0; c < filter_channel_dim; c++)
+  {
+    for (uint32_t n = 0; n < filter_batch_dim; n++)
+    {
+      for (uint32_t h = 0; h < filter_height_dim; h++)
+      {
+        for (uint32_t w = 0; w < filter_width_dim; w++)
+        {
+          uint32_t offset = n * filter_height_dim * filter_width_dim * filter_channel_dim +
+                            h * filter_width_dim * filter_channel_dim + w * filter_channel_dim + c;
+          filter->at<loco::DataType::FLOAT32>(offset) *= scale->at<loco::DataType::FLOAT32>(c);
+        }
+      }
+    }
+  }
+
+  // fuse shift with transposed conv
+  tconv->bias(shift);
+
+  if (add->fusedActivationFunction() == luci::FusedActFunc::RELU6)
+  {
+    // separate relu op from add op
+    auto relu = add->graph()->nodes()->create<luci::CircleRelu6>();
+    relu->features(tconv);
+
+    // remove mul node
+    replace(add).with(relu);
+  }
+  else
+  {
+    replace(add).with(tconv);
+  }
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseBatchNormWithTConvPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto tconv = dynamic_cast<luci::CircleTransposeConv *>(node);
+    if (not tconv)
+      continue;
+
+    changed |= fused_batch_norm_with_tconv(tconv);
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
index b335a53b4..60c1cdd72 100644
--- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
+++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
@@ -472,7 +472,12 @@ struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<bool>
         if (granularity == QuantizationGranularity::ChannelWise)
         {
           auto quantparam = circle_node->quantparam();
-          assert(quantparam != nullptr);
+          if (quantparam == nullptr)
+          {
+            assert(false && "quantparam is nullptr");
+            return false;
+          }
+
           auto min = quantparam->min;
           auto scaling_factor = quantparam->scale;
           int32_t channel_dim_index = 0;
diff --git a/compiler/luci/pass/src/RequantizePass.cpp b/compiler/luci/pass/src/RequantizePass.cpp
new file mode 100644
index 000000000..49fbf76ec
--- /dev/null
+++ b/compiler/luci/pass/src/RequantizePass.cpp
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RequantizePass.h"
+#include "QuantizationUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Log.h>
+
+#include <oops/UserExn.h>
+
+#include <iostream>
+#include <cmath>
+
+namespace luci
+{
+
+namespace
+{
+
+// Check if the node is the bias of Conv2D, DepthwiseConv2D, or FullyConnected layer
+bool is_bias(CircleConst *node)
+{
+  if (node == nullptr)
+    return false;
+
+  auto succs = loco::succs(node);
+  if (succs.size() != 1) // assume bias is used by only one node
+    return false;
+
+  for (auto out : succs)
+  {
+    auto conv = dynamic_cast<CircleConv2D *>(out);
+    if (conv != nullptr && conv->bias() == node)
+      return true;
+
+    auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
+    if (dw_conv != nullptr && dw_conv->bias() == node)
+      return true;
+
+    auto fc = dynamic_cast<CircleFullyConnected *>(out);
+    if (fc != nullptr && fc->bias() == node)
+      return true;
+
+    // TODO: add TransposeConv when bias is supported in CircleTransposeConv
+  }
+  return false;
+}
+
+void requant_nonconst_int8_to_uint8(CircleNode *circle_node)
+{
+  assert(circle_node->dtype() == loco::DataType::S8);
+
+  auto quantparam = circle_node->quantparam();
+  assert(quantparam != nullptr);
+  for (size_t i = 0; i < quantparam->zerop.size(); ++i)
+  {
+    quantparam->zerop[i] += 128;
+  }
+  circle_node->dtype(loco::DataType::U8);
+}
+
+// Requantize CircleConst from symmetric int8 to asymmetric uint8
+// Original values: -127 ~ 127
+// After requantization: 1 ~ 255 (zp <- zp + 128)
+void requant_const_int8_to_uint8(CircleConst *node)
+{
+  assert(node->dtype() == loco::DataType::S8);
+
+  uint32_t size = node->size<loco::DataType::S8>();
+  std::vector<int32_t> requantized_values(size);
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    int32_t data = node->at<loco::DataType::S8>(i);
+    requantized_values[i] = data + 128;
+  }
+
+  node->dtype(loco::DataType::U8); // change the type of tensor
+  node->size<loco::DataType::U8>(size);
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    assert(1 <= requantized_values[i] && requantized_values[i] <= 255);
+    node->at<loco::DataType::U8>(i) = requantized_values[i];
+  }
+
+  auto quantparam = node->quantparam();
+  assert(quantparam != nullptr);
+  for (size_t i = 0; i < quantparam->zerop.size(); ++i)
+  {
+    quantparam->zerop[i] += 128;
+  }
+}
+
+/**
+ * @brief RequantizeNonConst requantizes tensors for activations
+ */
+struct RequantizeNonConst final : public luci::CircleNodeMutableVisitor<bool>
+{
+  RequantizeNonConst(loco::DataType input, loco::DataType output)
+      : _input_type(input), _output_type(output)
+  {
+  }
+
+  loco::DataType _input_type;
+  loco::DataType _output_type;
+
+  // Requantize input tensors of each node
+  bool visit(luci::CircleNode *node)
+  {
+    LOGGER(l);
+    INFO(l) << "RequantizeNonConst visit node: " << node->name() << std::endl;
+    auto arity = node->arity();
+    for (uint32_t i = 0; i < arity; i++)
+    {
+      auto input_node = node->arg(i);
+      auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
+
+      // Check if this was quantized (only quantized tensors are requantized)
+      if (circle_node->quantparam() == nullptr)
+        continue;
+
+      // Check if this is already requantized
+      if (circle_node->dtype() == _output_type)
+        continue;
+
+      // Check if this is not const (only non-const is requantized in this function)
+      auto circle_const = dynamic_cast<CircleConst *>(circle_node);
+      if (circle_const != nullptr)
+        continue;
+
+      if (_input_type == loco::DataType::S8 && _output_type == loco::DataType::U8)
+        requant_nonconst_int8_to_uint8(circle_node);
+    }
+    return false;
+  }
+};
+
+/**
+ * @brief RequantizeConst requantizes tensors for weights
+ */
+struct RequantizeConst final : public luci::CircleNodeMutableVisitor<bool>
+{
+  RequantizeConst(loco::DataType input, loco::DataType output)
+      : _input_type(input), _output_type(output)
+  {
+  }
+
+  loco::DataType _input_type;
+  loco::DataType _output_type;
+
+  // Requantize input tensors of each node
+  bool visit(luci::CircleNode *node)
+  {
+    LOGGER(l);
+    INFO(l) << "RequantizeConst visit node: " << node->name() << std::endl;
+    auto arity = node->arity();
+    for (uint32_t i = 0; i < arity; i++)
+    {
+      auto input_node = node->arg(i);
+      auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
+
+      // Check if this was quantized (only quantized tensors are requantized)
+      if (circle_node->quantparam() == nullptr)
+        continue;
+
+      // Check if this is already requantized
+      if (circle_node->dtype() == _output_type)
+        continue;
+
+      // Check if this is const (only const is requantized in this function)
+      auto circle_const = dynamic_cast<CircleConst *>(circle_node);
+      if (circle_const == nullptr)
+        continue;
+
+      // Check if this is not bias
+      // bias is not requantized when int8 -> uint8
+      if (is_bias(circle_const))
+        continue;
+
+      if (_input_type == loco::DataType::S8 && _output_type == loco::DataType::U8)
+        requant_const_int8_to_uint8(circle_const);
+    }
+    return false;
+  }
+};
+
+} // namespace
+
+bool RequantizePass::run(loco::Graph *g)
+{
+  LOGGER(l);
+  INFO(l) << "RequantizePass Start" << std::endl;
+
+  // Requantize non-const (activations)
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    RequantizeNonConst rqnc(_input_dtype, _output_dtype);
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    circle_node->accept(&rqnc);
+  }
+
+  // Requantize const (including weights, constants)
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    RequantizeConst rqc(_input_dtype, _output_dtype);
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    circle_node->accept(&rqc);
+  }
+
+  // Update output dtype
+  auto graph_outputs = g->outputs();
+  for (auto node : loco::output_nodes(g))
+  {
+    auto circle_node = loco::must_cast<luci::CircleOutput *>(node);
+    if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_dtype)
+    {
+      circle_node->dtype(_output_dtype);
+      auto graph_output = graph_outputs->at(circle_node->index());
+      graph_output->dtype(_output_dtype);
+    }
+  }
+
+  INFO(l) << "RequantizePass End" << std::endl;
+  return false; // one time run
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.cpp
index 6355ec546..db25186b1 100644
--- a/compiler/luci/service/src/CircleShapeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleShapeInferenceRule.cpp
@@ -47,49 +47,19 @@ std::ostream &operator<<(std::ostream &os, const loco::TensorShape &tensor_shape
   return os;
 }
 
-// Call this for CircleAvgPool2D and CircleMaxPool2D only
-template <class Pool2DType> loco::NodeShape infer_pool_2d_shape(const Pool2DType *node)
+loco::TensorShape own_shape(const luci::CircleNode *node)
 {
-  LUCI_ASSERT(loco::shape_known(node->value()), "Shape must be known");
-
-  auto ifm_shape = loco::shape_get(node->value()).template as<loco::TensorShape>();
-  assert(ifm_shape.rank() == 4);
-
-  uint32_t input_height = ifm_shape.dim(1).value();
-  uint32_t input_width = ifm_shape.dim(2).value();
-  uint32_t stride_height = node->stride()->h();
-  uint32_t stride_width = node->stride()->w();
-  uint32_t window_height = node->filter()->h();
-  uint32_t window_width = node->filter()->w();
-  uint32_t dilation_height = 1; // dilation for CircleAvgPool2D and CircleMaxPool2D is 1
-  uint32_t dilation_width = 1;
-  uint32_t effective_window_height = dilation_height * (window_height - 1) + 1;
-  uint32_t effective_window_width = dilation_width * (window_width - 1) + 1;
-
-  uint32_t output_height = 0;
-  uint32_t output_width = 0;
-
-  if (node->padding() == luci::Padding::VALID)
-  {
-    output_height = (input_height + stride_height - effective_window_height) / stride_height;
-    output_width = (input_width + stride_width - effective_window_width) / stride_width;
-  }
-  else if (node->padding() == luci::Padding::SAME)
-  {
-    output_height = (input_height + stride_height - 1) / stride_height;
-    output_width = (input_width + stride_width - 1) / stride_width;
-  }
-  else
-    LUCI_ASSERT(false, "Wrong padding type");
-
-  loco::TensorShape ofm_shape;
-  ofm_shape.rank(4);
-  ofm_shape.dim(0) = ifm_shape.dim(0);
-  ofm_shape.dim(1) = output_height;
-  ofm_shape.dim(2) = output_width;
-  ofm_shape.dim(3) = ifm_shape.dim(3);
+  loco::TensorShape shape;
+  shape.rank(node->rank());
+  for (uint32_t r = 0; r < node->rank(); ++r)
+    shape.dim(r) = loco::Dimension(node->dim(r).value());
+  return shape;
+}
 
-  return loco::NodeShape{ofm_shape};
+loco::NodeShape use_own(const luci::CircleNode *node)
+{
+  loco::TensorShape shape = own_shape(node);
+  return loco::NodeShape{shape};
 }
 
 /**
@@ -192,6 +162,304 @@ loco::TensorShape broadcast_shape(const loco::TensorShape &x, const loco::Tensor
   return output_shape;
 }
 
+/**
+ * @brief vector_from_constant will return int64_t vector from CircleConst node
+ */
+template <loco::DataType T> std::vector<int64_t> vector_from_constant(luci::CircleConst *const_node)
+{
+  std::vector<int64_t> result;
+
+  for (uint32_t idx = 0; idx < const_node->size<T>(); ++idx)
+    result.push_back(const_node->at<T>(idx));
+
+  return result;
+}
+
+template <class CIRCLENODE> loco::NodeShape broadcast_xy(const CIRCLENODE *node)
+{
+  auto x_shape = loco::shape_get(node->x()).template as<loco::TensorShape>();
+  auto y_shape = loco::shape_get(node->y()).template as<loco::TensorShape>();
+
+  auto output_shape = broadcast_shape(x_shape, y_shape);
+
+  return loco::NodeShape{output_shape};
+}
+
+template <class CIRCLENODE> loco::NodeShape use_x(const CIRCLENODE *node)
+{
+  auto x_shape = loco::shape_get(node->x()).template as<loco::TensorShape>();
+  return loco::NodeShape{x_shape};
+}
+
+template <class CIRCLENODE> loco::NodeShape use_logits(const CIRCLENODE *node)
+{
+  auto shape = loco::shape_get(node->logits()).template as<loco::TensorShape>();
+  return loco::NodeShape{shape};
+}
+
+template <class CIRCLENODE>
+loco::NodeShape use_paddings(const CIRCLENODE *node, const luci::CircleConst *paddings)
+{
+  const loco::DataType S32 = loco::DataType::S32;
+
+  auto input_shape = loco::shape_get(node->input()).template as<loco::TensorShape>();
+
+  // TODO support other data type
+  LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now");
+  LUCI_ASSERT(paddings->rank() == 2, "paddings should be rank 2")
+
+  int32_t n = paddings->dim(0).value();
+  int32_t v = paddings->dim(1).value();
+
+  LUCI_ASSERT(v == 2, "paddings should be [n, 2]");
+  LUCI_ASSERT(n == int32_t(input_shape.rank()),
+              "paddings [n, 2] should have same value of input rank");
+
+  loco::TensorShape output_shape;
+
+  output_shape.rank(input_shape.rank());
+  for (int32_t ni = 0; ni < n; ++ni)
+  {
+    int32_t idx = ni * 2;
+    int value = input_shape.dim(ni).value();
+    value += paddings->at<S32>(idx + 0); // left
+    value += paddings->at<S32>(idx + 1); // right
+    output_shape.dim(ni) = value;
+  }
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_add_n(const luci::CircleAddN *node)
+{
+  auto shape = loco::shape_get(node->inputs(0)).as<loco::TensorShape>();
+
+  for (uint32_t idx = 1; idx < node->arity(); ++idx)
+  {
+    auto shape_idx = loco::shape_get(node->inputs(idx)).as<loco::TensorShape>();
+    if (!(shape == shape_idx))
+    {
+      INTERNAL_EXN_V("ADD_N shape not same as the first input: ", idx);
+    }
+  }
+  return loco::NodeShape{shape};
+}
+
+loco::NodeShape infer_arg_max(const luci::CircleArgMax *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
+
+  int64_t select_axis = 0;
+  {
+    LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
+
+    // Only support node's shape() is CircleConst with S32/S64
+    // Support S32 for now.
+    auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
+    LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
+                "Only support int32 CircleConst for CircleArgMax");
+
+    if (const_shape_node->rank() > 1)
+      INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
+                     oops::to_uint32(const_shape_node->rank()));
+
+    select_axis = const_shape_node->scalar<loco::DataType::S32>();
+  }
+  assert(select_axis < input_shape.rank());
+  assert(select_axis >= 0); // TODO support minus of this breaks
+
+  // NOTE select_axis is removed
+  loco::TensorShape shape_output;
+  uint32_t rank = input_shape.rank();
+  uint32_t shrink = static_cast<uint32_t>(select_axis);
+  assert(rank > 0);
+  shape_output.rank(rank - 1);
+  for (uint32_t r = 0, d = 0; r < rank; ++r)
+  {
+    if (r == shrink)
+      continue;
+    shape_output.dim(d++) = input_shape.dim(r);
+  }
+  return loco::NodeShape{shape_output};
+}
+
+loco::NodeShape infer_arg_min(const luci::CircleArgMin *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
+
+  int64_t select_axis = 0;
+  {
+    LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
+
+    // Only support node's shape() is CircleConst with S32/S64
+    // Support S32 for now.
+    auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
+    LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
+                "Only support int32 CircleConst for CircleArgMin");
+
+    if (const_shape_node->rank() > 1)
+      INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
+                     oops::to_uint32(const_shape_node->rank()));
+
+    select_axis = const_shape_node->scalar<loco::DataType::S32>();
+  }
+  assert(select_axis < input_shape.rank());
+  assert(select_axis >= 0); // TODO support minus of this breaks
+
+  // NOTE select_axis is removed
+  loco::TensorShape shape_output;
+  uint32_t rank = input_shape.rank();
+  uint32_t shrink = static_cast<uint32_t>(select_axis);
+  assert(rank > 0);
+  shape_output.rank(rank - 1);
+  for (uint32_t r = 0, d = 0; r < rank; ++r)
+  {
+    if (r == shrink)
+      continue;
+    shape_output.dim(d++) = input_shape.dim(r);
+  }
+  return loco::NodeShape{shape_output};
+}
+
+// Call this for CircleAvgPool2D and CircleMaxPool2D only
+template <class Pool2DType> loco::NodeShape infer_pool_2d_shape(const Pool2DType *node)
+{
+  LUCI_ASSERT(loco::shape_known(node->value()), "Shape must be known");
+
+  auto ifm_shape = loco::shape_get(node->value()).template as<loco::TensorShape>();
+  assert(ifm_shape.rank() == 4);
+
+  uint32_t input_height = ifm_shape.dim(1).value();
+  uint32_t input_width = ifm_shape.dim(2).value();
+  uint32_t stride_height = node->stride()->h();
+  uint32_t stride_width = node->stride()->w();
+  uint32_t window_height = node->filter()->h();
+  uint32_t window_width = node->filter()->w();
+  uint32_t dilation_height = 1; // dilation for CircleAvgPool2D and CircleMaxPool2D is 1
+  uint32_t dilation_width = 1;
+  uint32_t effective_window_height = dilation_height * (window_height - 1) + 1;
+  uint32_t effective_window_width = dilation_width * (window_width - 1) + 1;
+
+  uint32_t output_height = 0;
+  uint32_t output_width = 0;
+
+  if (node->padding() == luci::Padding::VALID)
+  {
+    output_height = (input_height + stride_height - effective_window_height) / stride_height;
+    output_width = (input_width + stride_width - effective_window_width) / stride_width;
+  }
+  else if (node->padding() == luci::Padding::SAME)
+  {
+    output_height = (input_height + stride_height - 1) / stride_height;
+    output_width = (input_width + stride_width - 1) / stride_width;
+  }
+  else
+    LUCI_ASSERT(false, "Wrong padding type");
+
+  loco::TensorShape ofm_shape;
+  ofm_shape.rank(4);
+  ofm_shape.dim(0) = ifm_shape.dim(0);
+  ofm_shape.dim(1) = output_height;
+  ofm_shape.dim(2) = output_width;
+  ofm_shape.dim(3) = ifm_shape.dim(3);
+
+  return loco::NodeShape{ofm_shape};
+}
+
+loco::NodeShape infer_batch_to_space_nd(const luci::CircleBatchToSpaceND *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
+
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  // Support only input rank is 3 and 4
+  assert(input_shape.rank() == 3 || input_shape.rank() == 4);
+
+  // Only support block_shape() with S32 type CircleConst for now
+  auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape());
+  LUCI_ASSERT(const_block_shape->dtype() == loco::DataType::S32, "Only support int32 block_shape");
+
+  // Only support crops() with S32 type CircleConst for now
+  auto const_crops = loco::must_cast<luci::CircleConst *>(node->crops());
+  LUCI_ASSERT(const_crops->dtype() == loco::DataType::S32, "Only support int32 crops");
+
+  auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
+  auto const_crops_shape = loco::shape_get(const_crops).as<loco::TensorShape>();
+  assert(const_block_shape_shape.rank() == 1);
+  assert(const_crops_shape.rank() == 2);
+
+  int32_t input_spatial_dim = input_shape.rank() - 2;
+  assert(const_block_shape_shape.dim(0) == input_spatial_dim);
+  assert(const_crops_shape.dim(0) == input_spatial_dim);
+  assert(const_crops_shape.dim(1) == 2);
+
+  loco::TensorShape shape_output;
+
+  shape_output.rank(input_shape.rank());
+
+  int32_t output_batch_size = input_shape.dim(0).value();
+  for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
+  {
+    int dim_size = input_shape.dim(dim + 1).value() * const_block_shape->at<S32>(dim);
+    dim_size -= const_crops->at<S32>(dim * 2);
+    dim_size -= const_crops->at<S32>(dim * 2 + 1);
+    shape_output.dim(dim + 1) = dim_size;
+
+    assert(output_batch_size % const_block_shape->at<S32>(dim) == 0);
+    output_batch_size = output_batch_size / const_block_shape->at<S32>(dim);
+  }
+  shape_output.dim(0) = output_batch_size;
+  shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
+
+  return loco::NodeShape{shape_output};
+}
+
+struct OutputSize
+{
+  uint32_t height = 0;
+  uint32_t width = 0;
+};
+
+template <class Conv2DType> OutputSize infer_conv2d_type(const Conv2DType *node)
+{
+  auto ifm_shape = loco::shape_get(node->input()).template as<loco::TensorShape>();
+  auto ker_shape = loco::shape_get(node->filter()).template as<loco::TensorShape>();
+  assert(ifm_shape.rank() == 4);
+  assert(ker_shape.rank() == 4);
+
+  uint32_t input_height = ifm_shape.dim(1).value();
+  uint32_t input_width = ifm_shape.dim(2).value();
+  uint32_t stride_height = node->stride()->h();
+  uint32_t stride_width = node->stride()->w();
+  uint32_t ker_height = ker_shape.dim(1).value();
+  uint32_t ker_width = ker_shape.dim(2).value();
+  uint32_t dilation_height = node->dilation()->h();
+  uint32_t dilation_width = node->dilation()->w();
+  uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1;
+  uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1;
+
+  uint32_t output_height = 0;
+  uint32_t output_width = 0;
+
+  if (node->padding() == luci::Padding::VALID)
+  {
+    output_height = (input_height + stride_height - effective_ker_height) / stride_height;
+    output_width = (input_width + stride_width - effective_ker_width) / stride_width;
+  }
+  else if (node->padding() == luci::Padding::SAME)
+  {
+    output_height = (input_height + stride_height - 1) / stride_height;
+    output_width = (input_width + stride_width - 1) / stride_width;
+  }
+  else
+    LUCI_ASSERT(false, "Wrong padding type");
+
+  OutputSize os{output_height, output_width};
+
+  return os;
+}
+
 // BatchMatMulV2 supports broadcasting in the batch dimensions(BatchMatMul doesn't)
 // TODO Distinguish BatchMatMul and BatchMatMulV2
 loco::NodeShape infer_batchmatmul_shape(const loco::TensorShape &x_shape,
@@ -238,13 +506,325 @@ loco::NodeShape infer_batchmatmul_shape(const loco::TensorShape &x_shape,
   return loco::NodeShape{output_shape};
 }
 
-loco::TensorShape own_shape(const luci::CircleNode *node)
+loco::NodeShape infer_concatenation(const luci::CircleConcatenation *node)
+{
+  // TODO Support when CircleConcatenation has 0 input
+  assert(node->numValues() > 0);
+
+  auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
+  auto axis = node->axis();
+  if (axis < 0)
+    axis += first_shape.rank();
+
+  assert(0 <= axis);
+  assert(first_shape.rank() > static_cast<uint32_t>(axis));
+
+  loco::TensorShape output_shape;
+
+  output_shape.rank(first_shape.rank());
+  for (uint32_t i = 0; i < output_shape.rank(); ++i)
+    output_shape.dim(i) = first_shape.dim(i);
+
+  for (uint32_t i = 1; i < node->numValues(); ++i)
+  {
+    auto input_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+
+    for (uint32_t j = 0; j < output_shape.rank(); ++j)
+    {
+      if (j == static_cast<uint32_t>(axis))
+        output_shape.dim(j) = output_shape.dim(j).value() + input_shape.dim(j).value();
+      else
+        assert(output_shape.dim(j) == input_shape.dim(j));
+    }
+  }
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_conv2d(const luci::CircleConv2D *node)
+{
+  LOGGER(l);
+
+  auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>();  // in NHWC
+  auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in OHWI
+
+  INFO(l) << "[luci] CircleConv2D ShapeInf ifm(" << ifm_shape.rank() << ") ker(" << ker_shape.rank()
+          << ")" << std::endl;
+
+  assert(ifm_shape.rank() == 4);
+  assert(ker_shape.rank() == 4);
+  assert(ifm_shape.dim(3) == ker_shape.dim(3));
+
+  auto os = infer_conv2d_type(node);
+
+  loco::TensorShape ofm_shape;
+  ofm_shape.rank(4);
+  ofm_shape.dim(0) = ifm_shape.dim(0);
+  ofm_shape.dim(1) = os.height;
+  ofm_shape.dim(2) = os.width;
+  ofm_shape.dim(3) = ker_shape.dim(0);
+
+  return loco::NodeShape{ofm_shape};
+}
+
+loco::NodeShape infer_depth_to_space(const luci::CircleDepthToSpace *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
+
+  // Only data format NHWC is supported
+  // TODO need to clarify what to do with layout in this operator
+  int32_t height = input_shape.dim(1).value();
+  int32_t width = input_shape.dim(2).value();
+  int32_t depth = input_shape.dim(3).value();
+
+  int block_size = node->block_size();
+
+  if (block_size < 2)
+    INTERNAL_EXN("Block size must be >= 2");
+
+  if (depth % (block_size * block_size))
+  {
+    INTERNAL_EXN("The input tensor's depth must be divisible by block_size^2");
+  }
+
+  loco::TensorShape output_shape;
+  output_shape.rank(4);
+
+  output_shape.dim(0) = input_shape.dim(0).value();
+  output_shape.dim(1) = height * block_size;
+  output_shape.dim(2) = width * block_size;
+  output_shape.dim(3) = depth / (block_size * block_size);
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_depthwise_conv2d(const luci::CircleDepthwiseConv2D *node)
+{
+  auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>();  // in NHWC
+  auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in 1 H W CM
+
+  assert(ifm_shape.rank() == 4);
+  assert(ker_shape.rank() == 4);
+  assert(ker_shape.dim(0).value() == 1);
+
+  auto os = infer_conv2d_type(node);
+
+  loco::TensorShape ofm_shape;
+  ofm_shape.rank(4);
+  ofm_shape.dim(0) = ifm_shape.dim(0);
+  ofm_shape.dim(1) = os.height;
+  ofm_shape.dim(2) = os.width;
+  ofm_shape.dim(3) = ker_shape.dim(3);
+
+  return loco::NodeShape{ofm_shape};
+}
+
+loco::NodeShape infer_expand_dims(const luci::CircleExpandDims *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
+  auto x_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  if (x_shape.rank() == 0)
+  {
+    // This maybe for unknown shape. We use shape from the node itself.
+    return use_own(node);
+  }
+  auto const_axis = loco::must_cast<luci::CircleConst *>(node->axis());
+  LUCI_ASSERT(const_axis->dtype() == S32, "Only support int32 CircleConst for axis");
+  if (const_axis->rank() != 0 && const_axis->rank() != 1)
+  {
+    INTERNAL_EXN_V("Non-scalar axis in OP", node->opnum());
+  }
+  int32_t axis = const_axis->at<S32>(0);
+  LUCI_ASSERT((axis <= static_cast<int32_t>(x_shape.rank())) &&
+                  (axis >= -1 - static_cast<int32_t>(x_shape.rank())),
+              "Axis has to be between [-(D+1), D], where D is rank of input.");
+  size_t positive_axis = axis < 0 ? x_shape.rank() + axis + 1 : axis;
+  loco::TensorShape output_shape;
+  output_shape.rank(x_shape.rank() + 1);
+  size_t i = 0;
+  for (; i < positive_axis; i++)
+    output_shape.dim(i) = x_shape.dim(i);
+  output_shape.dim(i) = loco::Dimension(1);
+  for (; i < x_shape.rank(); i++)
+    output_shape.dim(i + 1) = x_shape.dim(i);
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_fill(const luci::CircleFill *node)
 {
   loco::TensorShape shape;
-  shape.rank(node->rank());
-  for (uint32_t r = 0; r < node->rank(); ++r)
-    shape.dim(r) = loco::Dimension(node->dim(r).value());
-  return shape;
+  {
+    LUCI_ASSERT(node->dims(), "dims input should not be nullptr");
+
+    auto dims_node = dynamic_cast<luci::CircleConst *>(node->dims());
+    if (dims_node != nullptr)
+    {
+      // Only support node with S32
+      LUCI_ASSERT(dims_node->dtype() == loco::DataType::S32, "Only support int32 CircleConst");
+
+      if (dims_node->rank() != 1)
+        INTERNAL_EXN_V("Only support rank 1 CircleConst", oops::to_uint32(dims_node->rank()));
+
+      shape.rank(dims_node->dim(0).value());
+
+      for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+      {
+        shape.dim(axis) = dims_node->at<loco::DataType::S32>(axis);
+      }
+    }
+    else
+    {
+      shape = own_shape(node);
+    }
+  }
+
+  return loco::NodeShape{shape};
+}
+
+loco::NodeShape infer_fully_connected(const luci::CircleFullyConnected *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto weights_shape = loco::shape_get(node->weights()).as<loco::TensorShape>();
+
+  // Checking shape capability for fully connected layer
+  // Input: a tensor of at least rank 2 [D1, D2, ... Dn]
+  // Weight: [# of units, K]
+  // Output: [D1 * D2 * ... * Dn / K, # of units]
+  if (input_shape.rank() < 2 || weights_shape.rank() != 2)
+  {
+    // Return node own shape if shape inference is not possible
+    return use_own(node);
+  }
+
+  uint32_t input_size = 1;
+  for (uint32_t i = 0; i < input_shape.rank(); i++)
+  {
+    input_size = input_size * input_shape.dim(i).value();
+  }
+  const uint32_t batch_size = input_size / weights_shape.dim(1).value();
+  loco::TensorShape out_shape;
+  out_shape.rank(2);
+  out_shape.dim(0) = batch_size;
+  out_shape.dim(1) = weights_shape.dim(0);
+
+  return loco::NodeShape{out_shape};
+}
+
+loco::NodeShape infer_gather(const luci::CircleGather *node)
+{
+  loco::TensorShape output_shape;
+
+  const auto input_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
+  const auto positions_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+  int32_t axis = node->axis();
+
+  // If CircleGather input has a dynamic shape, it can't inference this shape. So, it returns the
+  // shape that node already has.
+  if (input_shape.rank() == 0 || positions_shape.rank() == 0)
+    return use_own(node);
+
+  if (axis < 0)
+    axis += input_shape.rank();
+
+  output_shape.rank(input_shape.rank() - 1 + positions_shape.rank());
+  int32_t outdim_index = 0;
+  for (int32_t i = 0; i < axis; ++i)
+    output_shape.dim(outdim_index++) = input_shape.dim(i);
+  for (uint32_t i = 0; i < positions_shape.rank(); ++i)
+    output_shape.dim(outdim_index++) = positions_shape.dim(i);
+  for (uint32_t i = axis + 1; i < input_shape.rank(); ++i)
+    output_shape.dim(outdim_index++) = input_shape.dim(i);
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_gather_nd(const luci::CircleGatherNd *node)
+{
+  loco::TensorShape output_shape;
+
+  const auto params_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
+  const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+
+  const auto params_rank = params_shape.rank();
+  const auto indices_rank = indices_shape.rank();
+
+  // see https://www.tensorflow.org/api_docs/python/tf/gather_nd
+  // output.shape = indices.shape[:-1] + params.shape[indices.shape[-1]:]
+  // batch_dims isn't supported in tflite
+
+  // TODO: replace exceptions with setting shape to unknown?
+
+  if (!indices_shape.dim(indices_rank - 1).known())
+    INTERNAL_EXN("Last indices dimension is unknown");
+
+  auto indices_last_dim = indices_shape.dim(indices_rank - 1).value();
+
+  if (indices_last_dim > params_rank)
+    INTERNAL_EXN("Last indices dimension should be <= params rank");
+
+  const uint32_t output_rank = indices_rank + params_rank - indices_last_dim - 1;
+
+  output_shape.rank(output_rank);
+
+  uint32_t output_index = 0;
+  for (uint32_t i = 0; i < indices_rank - 1; ++i)
+  {
+    auto &dim = indices_shape.dim(i);
+    if (!dim.known())
+      INTERNAL_EXN("Unknown indices dimension is unsupported");
+    output_shape.dim(output_index++).set(dim.value());
+  }
+
+  for (uint32_t i = indices_last_dim; i < params_rank; ++i)
+  {
+    auto &dim = params_shape.dim(i);
+    if (!dim.known())
+      INTERNAL_EXN("Unknown params dimension is unsupported");
+    output_shape.dim(output_index++).set(dim.value());
+  }
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_matrix_diag(const luci::CircleMatrixDiag *node)
+{
+  loco::TensorShape output_shape;
+
+  auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
+  auto rank = diagonal_shape.rank();
+
+  output_shape.rank(rank + 1);
+
+  for (uint32_t i = 0; i < rank; i++)
+  {
+    output_shape.dim(i) = diagonal_shape.dim(i);
+  }
+
+  output_shape.dim(rank) = diagonal_shape.dim(rank - 1);
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_matrix_set_diag(const luci::CircleMatrixSetDiag *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
+
+  auto rank = diagonal_shape.rank();
+
+  LUCI_ASSERT(rank == input_shape.rank() - 1, "diagonal rank = input rank - 1");
+
+  for (uint32_t i = 0; i < rank - 1; i++)
+  {
+    LUCI_ASSERT(diagonal_shape.dim(i) == input_shape.dim(i), "diagonal dims = input dims");
+  }
+
+  auto dim = std::min(input_shape.dim(rank - 1).value(), input_shape.dim(rank).value());
+
+  LUCI_ASSERT(dim == diagonal_shape.dim(rank - 1), "Max diag len error");
+
+  return loco::NodeShape{input_shape};
 }
 
 loco::TensorShape infer_reducer(const loco::Node *input, const loco::Node *indices, bool keep_dims)
@@ -302,885 +882,1311 @@ loco::TensorShape infer_reducer(const loco::Node *input, const loco::Node *indic
   return output_shape;
 }
 
-/**
- * @brief vector_from_constant will return int64_t vector from CircleConst node
- */
-template <loco::DataType T> std::vector<int64_t> vector_from_constant(luci::CircleConst *const_node)
+loco::NodeShape infer_mirror_pad(const luci::CircleMirrorPad *node)
 {
-  std::vector<int64_t> result;
-
-  for (uint32_t idx = 0; idx < const_node->size<T>(); ++idx)
-    result.push_back(const_node->at<T>(idx));
+  // TODO support non-const case
+  auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+  return use_paddings(node, paddings);
+}
 
-  return result;
+loco::NodeShape infer_one_hot(const luci::CircleOneHot *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
+  auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+  // Only support OneHot node's depth() is CircleConst with type S32
+  // TODO support depth with other types
+  auto depth = loco::must_cast<luci::CircleConst *>(node->depth());
+  LUCI_ASSERT(depth->dtype() == S32, "Only support int32 CircleConst");
+  if (depth->rank() != 0)
+    INTERNAL_EXN_V("Only support rank 0 CircleOneHot in Depth", oops::to_uint32(depth->rank()));
+  loco::TensorShape output_shape;
+  output_shape.rank(indices_shape.rank() + 1);
+  auto axis = node->axis();
+  if (axis < 0)
+    axis += indices_shape.rank() + 1;
+  LUCI_ASSERT(0 <= axis, "Axis is out of range");
+  LUCI_ASSERT(static_cast<uint32_t>(axis) <= indices_shape.rank(), "Axis is out of range");
+  uint32_t j = 0;
+  for (uint32_t i = 0; i < output_shape.rank(); i++)
+  {
+    if (i == static_cast<uint32_t>(axis))
+    {
+      output_shape.dim(i) = depth->at<S32>(0);
+    }
+    else
+    {
+      output_shape.dim(i) = indices_shape.dim(j++);
+    }
+  }
+  return loco::NodeShape{output_shape};
 }
 
-template <class CIRCLENODE> loco::NodeShape broadcast_xy(const CIRCLENODE *node)
+loco::NodeShape infer_pack(const luci::CirclePack *node)
 {
-  auto x_shape = loco::shape_get(node->x()).template as<loco::TensorShape>();
-  auto y_shape = loco::shape_get(node->y()).template as<loco::TensorShape>();
+  LUCI_ASSERT(node->values_count() > 0, "Only support one or more inputs");
 
-  auto output_shape = broadcast_shape(x_shape, y_shape);
+  auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
+  // Make sure all inputs have the same shape.
+  for (uint32_t i = 1; i < node->values_count(); ++i)
+  {
+    auto in_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+    LUCI_ASSERT(loco::NodeShape{first_shape} == loco::NodeShape{in_shape},
+                "All inputs must have the same shape");
+  }
+
+  // Checking shape capability for pack layer
+  // Input: tensors [D1, D2, ... Dn]
+  // Axis: K
+  // Output: [D1, D2, ... , D_K-1, n, D_K+1, ... Dn]
+  auto axis = node->axis();
+  if (axis < 0)
+    axis += first_shape.rank() + 1;
+
+  LUCI_ASSERT(0 <= axis, "Axis is out of range");
+  LUCI_ASSERT(static_cast<uint32_t>(axis) <= first_shape.rank(), "Axis is out of range");
+
+  loco::TensorShape output_shape;
+  output_shape.rank(first_shape.rank() + 1);
+
+  uint32_t j = 0;
+  for (uint32_t i = 0; i < output_shape.rank(); ++i)
+  {
+    if (i == static_cast<uint32_t>(axis))
+    {
+      output_shape.dim(i) = node->values_count();
+    }
+    else
+    {
+      output_shape.dim(i) = first_shape.dim(j++);
+    }
+  }
 
   return loco::NodeShape{output_shape};
 }
 
-template <class CIRCLENODE> loco::NodeShape use_x(const CIRCLENODE *node)
+loco::NodeShape infer_pad(const luci::CirclePad *node)
 {
-  auto x_shape = loco::shape_get(node->x()).template as<loco::TensorShape>();
-  return loco::NodeShape{x_shape};
+  // TODO support non-const case
+  auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+  return use_paddings(node, paddings);
 }
 
-template <class CIRCLENODE> loco::NodeShape use_logits(const CIRCLENODE *node)
+loco::NodeShape infer_pad_v2(const luci::CirclePadV2 *node)
 {
-  auto shape = loco::shape_get(node->logits()).template as<loco::TensorShape>();
-  return loco::NodeShape{shape};
+  // TODO support non-const case
+  auto paddings = dynamic_cast<luci::CircleConst *>(node->paddings());
+  if (!paddings)
+  {
+    auto node_shape = own_shape(node);
+    return loco::NodeShape{node_shape};
+  }
+  return use_paddings(node, paddings);
 }
 
-loco::NodeShape use_own(const luci::CircleNode *node)
+loco::NodeShape infer_p_relu(const luci::CirclePRelu *node)
 {
-  loco::TensorShape shape = own_shape(node);
-  return loco::NodeShape{shape};
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto alpha_shape = loco::shape_get(node->alpha()).as<loco::TensorShape>();
+
+  auto output_shape = broadcast_shape(input_shape, alpha_shape);
+
+  return loco::NodeShape{output_shape};
 }
 
-/**
- * @brief Class to infer the shape of CircleNode
- *
- * @note All CircleNode's inputs and outputs are always loco::Domain::Tensor
- */
-class ShapeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::NodeShape>
+loco::NodeShape infer_range(const luci::CircleRange *node)
 {
-public:
-  loco::NodeShape visit(const luci::CircleAbs *node) final { return use_x(node); }
+  loco::TensorShape output_shape;
+  output_shape.rank(1);
 
-  loco::NodeShape visit(const luci::CircleAdd *node) final { return broadcast_xy(node); }
+  auto start_node = dynamic_cast<luci::CircleConst *>(node->start());
+  auto limit_node = dynamic_cast<luci::CircleConst *>(node->limit());
+  auto delta_node = dynamic_cast<luci::CircleConst *>(node->delta());
 
-  loco::NodeShape visit(const luci::CircleAddN *node) final
+  if (start_node == nullptr || limit_node == nullptr || delta_node == nullptr)
   {
-    auto shape = loco::shape_get(node->inputs(0)).as<loco::TensorShape>();
+    return use_own(node);
+  }
 
-    for (uint32_t idx = 1; idx < node->arity(); ++idx)
-    {
-      auto shape_idx = loco::shape_get(node->inputs(idx)).as<loco::TensorShape>();
-      if (!(shape == shape_idx))
-      {
-        INTERNAL_EXN_V("ADD_N shape not same as the first input: ", idx);
-      }
-    }
+  double start = 0, limit = 0, delta = 0;
+
+#define GET_RANGE_PARAM(DT)         \
+  start = start_node->scalar<DT>(); \
+  limit = limit_node->scalar<DT>(); \
+  delta = delta_node->scalar<DT>();
 
-    return loco::NodeShape{shape};
+  switch (start_node->dtype())
+  {
+    case loco::DataType::FLOAT32:
+      GET_RANGE_PARAM(loco::DataType::FLOAT32)
+      break;
+    case loco::DataType::S32:
+      GET_RANGE_PARAM(loco::DataType::S32)
+      break;
+    default:
+      INTERNAL_EXN("Range data type not supported");
   }
 
-  loco::NodeShape visit(const luci::CircleArgMax *node) final
+#undef GET_RANGE_PARAM
+
+  if (delta == 0)
+    INTERNAL_EXN("Delta can not be zero");
+
+  output_shape.dim(0) = ceil((limit - start) / delta);
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_reshape(const luci::CircleReshape *node)
+{
+  LOGGER(l);
+
+  const loco::DataType S32 = loco::DataType::S32;
+
+  loco::TensorShape shape_by_input;
   {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
+    LUCI_ASSERT(node->shape(), "2nd input shape() should not be nullptr");
 
-    int64_t select_axis = 0;
+    // Only support node's shape() is CircleConst with S32
+    // TODO support other node with other types
+    auto const_shape_node = dynamic_cast<luci::CircleConst *>(node->shape());
+    if (const_shape_node != nullptr)
     {
-      LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
-
-      // Only support node's shape() is CircleConst with S32/S64
-      // Support S32 for now.
-      auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
-      LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
-                  "Only support int32 CircleConst for CircleArgMax");
+      LUCI_ASSERT(const_shape_node->dtype() == S32, "Only support int32 CircleConst");
 
-      if (const_shape_node->rank() > 1)
-        INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
-                       oops::to_uint32(const_shape_node->rank()));
+      shape_by_input.rank(const_shape_node->size<S32>());
 
-      select_axis = const_shape_node->scalar<loco::DataType::S32>();
+      for (uint32_t axis = 0; axis < shape_by_input.rank(); ++axis)
+      {
+        shape_by_input.dim(axis) = const_shape_node->at<S32>(axis);
+      }
     }
-    assert(select_axis < input_shape.rank());
-    assert(select_axis >= 0); // TODO support minus of this breaks
-
-    // NOTE select_axis is removed
-    loco::TensorShape shape_output;
-    uint32_t rank = input_shape.rank();
-    uint32_t shrink = static_cast<uint32_t>(select_axis);
-    assert(rank > 0);
-    shape_output.rank(rank - 1);
-    for (uint32_t r = 0, d = 0; r < rank; ++r)
+    else
     {
-      if (r == shrink)
-        continue;
-      shape_output.dim(d++) = input_shape.dim(r);
+      // We use shape from the node itself
+      shape_by_input = own_shape(node);
     }
-    return loco::NodeShape{shape_output};
   }
 
-  loco::NodeShape visit(const luci::CircleArgMin *node) final
+  loco::TensorShape shape_by_attr;
   {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    auto dimension_shape = loco::shape_get(node->dimension()).as<loco::TensorShape>();
+    shape_by_attr.rank(node->newShape()->rank());
 
-    int64_t select_axis = 0;
+    for (uint32_t axis = 0; axis < shape_by_attr.rank(); ++axis)
     {
-      LUCI_ASSERT(node->dimension(), "2nd input dimension() should not be nullptr");
+      shape_by_attr.dim(axis) = node->newShape()->dim(axis);
+    }
+  }
 
-      // Only support node's shape() is CircleConst with S32/S64
-      // Support S32 for now.
-      auto const_shape_node = loco::must_cast<luci::CircleConst *>(node->dimension());
-      LUCI_ASSERT(const_shape_node->dtype() == loco::DataType::S32,
-                  "Only support int32 CircleConst for CircleArgMin");
+  if (!(shape_by_input == shape_by_attr))
+  {
+    INFO(l) << "CircleReshape: Two new shape information mismatched : " << std::endl;
+    INFO(l) << "   shape_by_input : " << shape_by_input << std::endl;
+    INFO(l) << "   shape_by_attr : " << shape_by_attr << std::endl;
+  }
 
-      if (const_shape_node->rank() > 1)
-        INTERNAL_EXN_V("Only support rank 0/1 CircleConst",
-                       oops::to_uint32(const_shape_node->rank()));
+  loco::TensorShape output_shape = shape_by_input;
 
-      select_axis = const_shape_node->scalar<loco::DataType::S32>();
+  // One of the dimensions can have special value -1, meaning its actual value should be inferred.
+  const auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
+  const uint32_t input_element_count = loco::element_count(&input_shape);
+  uint32_t output_element_count = 1;
+  uint32_t unknown_dim_index = UINT32_MAX;
+  for (uint32_t dim_index = 0; dim_index < output_shape.rank(); ++dim_index)
+  {
+    const uint32_t dim_value = output_shape.dim(dim_index).value();
+    if (static_cast<int>(dim_value) == -1)
+    {
+      LUCI_ASSERT(unknown_dim_index == UINT32_MAX, "More than one unknown dimension");
+      unknown_dim_index = dim_index;
     }
-    assert(select_axis < input_shape.rank());
-    assert(select_axis >= 0); // TODO support minus of this breaks
-
-    // NOTE select_axis is removed
-    loco::TensorShape shape_output;
-    uint32_t rank = input_shape.rank();
-    uint32_t shrink = static_cast<uint32_t>(select_axis);
-    assert(rank > 0);
-    shape_output.rank(rank - 1);
-    for (uint32_t r = 0, d = 0; r < rank; ++r)
+    else
     {
-      if (r == shrink)
-        continue;
-      shape_output.dim(d++) = input_shape.dim(r);
+      output_element_count *= dim_value;
     }
-    return loco::NodeShape{shape_output};
   }
-
-  loco::NodeShape visit(const luci::CircleAveragePool2D *node) final
+  if (unknown_dim_index != UINT32_MAX)
   {
-    return infer_pool_2d_shape(node);
+    output_shape.dim(unknown_dim_index) = input_element_count / output_element_count;
   }
 
-  loco::NodeShape visit(const luci::CircleBatchMatMul *node) final
-  {
-    auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
-    auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
+  return loco::NodeShape{output_shape};
+}
 
-    return infer_batchmatmul_shape(x_shape, y_shape, node->adj_x(), node->adj_y());
-  }
+loco::NodeShape infer_resize_bilinear(const luci::CircleResizeBilinear *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
 
-  loco::NodeShape visit(const luci::CircleBatchToSpaceND *node) final
-  {
-    const loco::DataType S32 = loco::DataType::S32;
+  if (input_shape.rank() != 4)
+    INTERNAL_EXN("Expected ResizeBilinear input to have rank 4");
 
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    // Support only input rank is 3 and 4
-    assert(input_shape.rank() == 3 || input_shape.rank() == 4);
+  auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
 
-    // Only support block_shape() with S32 type CircleConst for now
-    auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape());
-    LUCI_ASSERT(const_block_shape->dtype() == loco::DataType::S32,
-                "Only support int32 block_shape");
+  if (const_node->dtype() != loco::DataType::S32)
+    INTERNAL_EXN("Only S32 datatype is supported for ResizeBilinear size");
 
-    // Only support crops() with S32 type CircleConst for now
-    auto const_crops = loco::must_cast<luci::CircleConst *>(node->crops());
-    LUCI_ASSERT(const_crops->dtype() == loco::DataType::S32, "Only support int32 crops");
+  if (const_node->rank() != 1)
+    INTERNAL_EXN("Expected size tensor of rank 1");
 
-    auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
-    auto const_crops_shape = loco::shape_get(const_crops).as<loco::TensorShape>();
-    assert(const_block_shape_shape.rank() == 1);
-    assert(const_crops_shape.rank() == 2);
+  if (const_node->dim(0).value() != 2)
+    INTERNAL_EXN("Expected size tensor with shape [2]");
 
-    int32_t input_spatial_dim = input_shape.rank() - 2;
-    assert(const_block_shape_shape.dim(0) == input_spatial_dim);
-    assert(const_crops_shape.dim(0) == input_spatial_dim);
-    assert(const_crops_shape.dim(1) == 2);
+  loco::TensorShape output_shape;
+  output_shape.rank(4);
+  output_shape.dim(0) = input_shape.dim(0);
+  output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
+  output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
+  output_shape.dim(3) = input_shape.dim(3);
 
-    loco::TensorShape shape_output;
+  return loco::NodeShape{output_shape};
+}
 
-    shape_output.rank(input_shape.rank());
+loco::NodeShape infer_resize_nearest_neighbor(const luci::CircleResizeNearestNeighbor *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
 
-    int32_t output_batch_size = input_shape.dim(0).value();
-    for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
-    {
-      int dim_size = input_shape.dim(dim + 1).value() * const_block_shape->at<S32>(dim);
-      dim_size -= const_crops->at<S32>(dim * 2);
-      dim_size -= const_crops->at<S32>(dim * 2 + 1);
-      shape_output.dim(dim + 1) = dim_size;
+  if (input_shape.rank() != 4)
+    INTERNAL_EXN("Expected ResizeNearesNeighbor input to have rank 4");
 
-      assert(output_batch_size % const_block_shape->at<S32>(dim) == 0);
-      output_batch_size = output_batch_size / const_block_shape->at<S32>(dim);
-    }
-    shape_output.dim(0) = output_batch_size;
-    shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
+  auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
 
-    return loco::NodeShape{shape_output};
-  }
+  if (const_node->dtype() != loco::DataType::S32)
+    INTERNAL_EXN("Only S32 datatype is supported for ResizeNearesNeighbor size");
 
-  loco::NodeShape visit(const luci::CircleCast *node) final { return use_x(node); }
+  if (const_node->rank() != 1)
+    INTERNAL_EXN("Expected size tensor of rank 1");
 
-  loco::NodeShape visit(const luci::CircleCeil *node) final { return use_x(node); }
+  if (const_node->dim(0).value() != 2)
+    INTERNAL_EXN("Expected size tensor with shape [2]");
 
-  loco::NodeShape visit(const luci::CircleConcatenation *node) final
-  {
-    // TODO Support when CircleConcatenation has 0 input
-    assert(node->numValues() > 0);
+  loco::TensorShape output_shape;
+  output_shape.rank(4);
+  output_shape.dim(0) = input_shape.dim(0);
+  output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
+  output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
+  output_shape.dim(3) = input_shape.dim(3);
 
-    auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
-    auto axis = node->axis();
-    if (axis < 0)
-      axis += first_shape.rank();
+  return loco::NodeShape{output_shape};
+}
 
-    assert(0 <= axis);
-    assert(first_shape.rank() > static_cast<uint32_t>(axis));
+loco::NodeShape infer_scatter_nd(const luci::CircleScatterNd *node)
+{
+  loco::TensorShape output_shape;
 
-    loco::TensorShape output_shape;
+  auto shape_node = loco::must_cast<luci::CircleConst *>(node->shape());
 
-    output_shape.rank(first_shape.rank());
-    for (uint32_t i = 0; i < output_shape.rank(); ++i)
-      output_shape.dim(i) = first_shape.dim(i);
+  const loco::DataType S32 = loco::DataType::S32;
+  const loco::DataType S64 = loco::DataType::S64;
 
-    for (uint32_t i = 1; i < node->numValues(); ++i)
-    {
-      auto input_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
+  std::vector<int64_t> vect_shape;
 
-      for (uint32_t j = 0; j < output_shape.rank(); ++j)
-      {
-        if (j == static_cast<uint32_t>(axis))
-          output_shape.dim(j) = output_shape.dim(j).value() + input_shape.dim(j).value();
-        else
-          assert(output_shape.dim(j) == input_shape.dim(j));
-      }
-    }
+  if (shape_node->dtype() == S32)
+    vect_shape = vector_from_constant<S32>(shape_node);
+  else if (shape_node->dtype() == S64)
+    vect_shape = vector_from_constant<S64>(shape_node);
+  else
+    LUCI_ASSERT(false, "Only support int32/int64 for shape()");
 
-    return loco::NodeShape{output_shape};
-  }
+  output_shape.rank(vect_shape.size());
+  for (uint32_t i = 0; i < vect_shape.size(); ++i)
+    output_shape.dim(i) = vect_shape[i];
 
-  loco::NodeShape visit(const luci::CircleConst *node) final { return use_own(node); }
+  return loco::NodeShape{output_shape};
+}
 
-  loco::NodeShape visit(const luci::CircleConv2D *node) final
-  {
-    LOGGER(l);
+loco::NodeShape infer_segment_sum(const luci::CircleSegmentSum *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto segment_shape = loco::shape_get(node->segment_ids()).as<loco::TensorShape>();
 
-    auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>();  // in NHWC
-    auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in OHWI
+  LUCI_ASSERT(segment_shape.rank() == 1, "segment_ids must be 1-D tensor");
+  LUCI_ASSERT(segment_shape.dim(0).value() == input_shape.dim(0).value(),
+              "segment_ids size must be equal to the size of data's first dimension");
 
-    INFO(l) << "[luci] CircleConv2D ShapeInf ifm(" << ifm_shape.rank() << ") ker("
-            << ker_shape.rank() << ")" << std::endl;
+  auto ids_shape_value = loco::must_cast<luci::CircleConst *>(node->segment_ids());
 
-    assert(ifm_shape.rank() == 4);
-    assert(ker_shape.rank() == 4);
-    assert(ifm_shape.dim(3) == ker_shape.dim(3));
+  std::vector<int64_t> vect_ids;
 
-    uint32_t input_height = ifm_shape.dim(1).value();
-    uint32_t input_width = ifm_shape.dim(2).value();
-    uint32_t stride_height = node->stride()->h();
-    uint32_t stride_width = node->stride()->w();
-    uint32_t ker_height = ker_shape.dim(1).value();
-    uint32_t ker_width = ker_shape.dim(2).value();
-    uint32_t dilation_height = node->dilation()->h();
-    uint32_t dilation_width = node->dilation()->w();
-    uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1;
-    uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1;
+  if (ids_shape_value->dtype() == loco::DataType::S32)
+    vect_ids = vector_from_constant<loco::DataType::S32>(ids_shape_value);
 
-    uint32_t output_height = 0;
-    uint32_t output_width = 0;
+  LUCI_ASSERT(std::is_sorted(vect_ids.begin(), vect_ids.end()),
+              "segment_ids values should be sorted")
 
-    if (node->padding() == luci::Padding::VALID)
-    {
-      output_height = (input_height + stride_height - effective_ker_height) / stride_height;
-      output_width = (input_width + stride_width - effective_ker_width) / stride_width;
-    }
-    else if (node->padding() == luci::Padding::SAME)
+  loco::TensorShape output_shape;
+
+  output_shape.rank(input_shape.rank());
+
+  for (uint32_t i = 1; i < input_shape.rank(); ++i)
+    output_shape.dim(i) = input_shape.dim(i);
+
+  output_shape.dim(0) = vect_ids.back() + 1;
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_select(const luci::CircleSelect *node)
+{
+  auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
+  assert(t_shape == loco::shape_get(node->e()).as<loco::TensorShape>());
+
+  // condition shape validation
+  auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
+  if (c_shape.rank() != t_shape.rank())
+  {
+    if (c_shape.rank() != 0 && c_shape.rank() != 1)
+      INTERNAL_EXN_V("CircleSelect condition rank is not 0 nor 1: ", c_shape.rank());
+
+    if (c_shape.rank() == 1)
     {
-      output_height = (input_height + stride_height - 1) / stride_height;
-      output_width = (input_width + stride_width - 1) / stride_width;
+      if (c_shape.dim(0).value() != t_shape.dim(0).value())
+        INTERNAL_EXN("CircleSelect condition dim(0) should match with t.dim(0)");
     }
-    else
-      LUCI_ASSERT(false, "Wrong padding type");
+  }
 
-    loco::TensorShape ofm_shape;
-    ofm_shape.rank(4);
-    ofm_shape.dim(0) = ifm_shape.dim(0);
-    ofm_shape.dim(1) = output_height;
-    ofm_shape.dim(2) = output_width;
-    ofm_shape.dim(3) = ker_shape.dim(0);
+  return loco::NodeShape{t_shape};
+}
 
-    return loco::NodeShape{ofm_shape};
-  }
+loco::NodeShape infer_select_v2(const luci::CircleSelectV2 *node)
+{
+  auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
+  auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
+  auto e_shape = loco::shape_get(node->e()).as<loco::TensorShape>();
 
-  loco::NodeShape visit(const luci::CircleCos *node) final { return use_x(node); }
+  // validate ability to broadcast shapes to each other
+  auto b_shape = broadcast_shape(broadcast_shape(c_shape, t_shape), e_shape);
+  return loco::NodeShape{b_shape};
+}
 
-  loco::NodeShape visit(const luci::CircleCustom *node) final { return use_own(node); }
+loco::NodeShape infer_shape(const luci::CircleShape *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
 
-  loco::NodeShape visit(const luci::CircleDepthToSpace *node) final
-  {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
+  loco::TensorShape output_shape;
 
-    // Only data format NHWC is supported
-    // TODO need to clarify what to do with layout in this operator
-    int32_t height = input_shape.dim(1).value();
-    int32_t width = input_shape.dim(2).value();
-    int32_t depth = input_shape.dim(3).value();
+  output_shape.rank(1);
+  output_shape.dim(0) = input_shape.rank();
 
-    int block_size = node->block_size();
+  return loco::NodeShape{output_shape};
+}
 
-    if (block_size < 2)
-      INTERNAL_EXN("Block size must be >= 2");
+loco::NodeShape infer_slice(const luci::CircleSlice *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
+  const loco::DataType S64 = loco::DataType::S64;
+
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
 
-    if (depth % (block_size * block_size))
+  auto const_begin = loco::must_cast<luci::CircleConst *>(node->begin());
+  auto const_size = loco::must_cast<luci::CircleConst *>(node->size());
+
+  loco::TensorShape output_shape;
+  std::vector<int64_t> vect_begin; // to hold both S32/S64, we use int64_t
+  std::vector<int64_t> vect_size;
+
+  if (const_begin->dtype() == S32)
+    vect_begin = vector_from_constant<S32>(const_begin);
+  else if (const_begin->dtype() == S64)
+    vect_begin = vector_from_constant<S64>(const_begin);
+  else
+    LUCI_ASSERT(false, "Only support int32/int64 for begin()");
+
+  if (const_size->dtype() == S32)
+    vect_size = vector_from_constant<S32>(const_size);
+  else if (const_size->dtype() == S64)
+    vect_size = vector_from_constant<S64>(const_size);
+  else
+    LUCI_ASSERT(false, "Only support int32/int64 for size()");
+
+  assert(input_shape.rank() == vect_begin.size());
+  assert(input_shape.rank() == vect_size.size());
+
+  output_shape.rank(vect_begin.size());
+  for (uint32_t idx = 0; idx < vect_begin.size(); ++idx)
+  {
+    auto size = vect_size.at(idx);
+    if (size == -1)
     {
-      INTERNAL_EXN("The input tensor's depth must be divisible by block_size^2");
+      size = input_shape.dim(idx).value() - vect_begin.at(idx);
     }
+    output_shape.dim(idx) = size;
+  }
 
-    loco::TensorShape output_shape;
-    output_shape.rank(4);
+  return loco::NodeShape{output_shape};
+}
 
-    output_shape.dim(0) = input_shape.dim(0).value();
-    output_shape.dim(1) = height * block_size;
-    output_shape.dim(2) = width * block_size;
-    output_shape.dim(3) = depth / (block_size * block_size);
+loco::NodeShape infer_space_to_batch_nd(const luci::CircleSpaceToBatchND *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
 
-    return loco::NodeShape{output_shape};
-  }
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  // Support only input rank is 3 and 4
+  assert(input_shape.rank() == 3 || input_shape.rank() == 4);
 
-  loco::NodeShape visit(const luci::CircleDepthwiseConv2D *node) final
-  {
-    auto ifm_shape = loco::shape_get(node->input()).as<loco::TensorShape>();  // in NHWC
-    auto ker_shape = loco::shape_get(node->filter()).as<loco::TensorShape>(); // in 1 H W CM
+  // Only support block_shape() with S32 type CircleConst for now
+  auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape());
+  LUCI_ASSERT(const_block_shape->dtype() == S32, "Only support int32 block_shape");
 
-    assert(ifm_shape.rank() == 4);
-    assert(ker_shape.rank() == 4);
-    assert(ker_shape.dim(0).value() == 1);
+  // Only support paddings() with S32 type CircleConst for now
+  auto const_paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+  LUCI_ASSERT(const_paddings->dtype() == S32, "Only support int32 paddings");
 
-    uint32_t input_height = ifm_shape.dim(1).value();
-    uint32_t input_width = ifm_shape.dim(2).value();
-    uint32_t stride_height = node->stride()->h();
-    uint32_t stride_width = node->stride()->w();
-    uint32_t ker_height = ker_shape.dim(1).value();
-    uint32_t ker_width = ker_shape.dim(2).value();
-    uint32_t dilation_height = node->dilation()->h();
-    uint32_t dilation_width = node->dilation()->w();
-    uint32_t effective_ker_height = dilation_height * (ker_height - 1) + 1;
-    uint32_t effective_ker_width = dilation_width * (ker_width - 1) + 1;
+  auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
+  auto const_paddings_shape = loco::shape_get(const_paddings).as<loco::TensorShape>();
+  assert(const_block_shape_shape.rank() == 1);
+  assert(const_paddings_shape.rank() == 2);
 
-    uint32_t output_height = 0;
-    uint32_t output_width = 0;
+  int32_t input_spatial_dim = input_shape.rank() - 2;
+  assert(const_block_shape_shape.dim(0) == input_spatial_dim);
+  assert(const_paddings_shape.dim(0) == input_spatial_dim);
+  assert(const_paddings_shape.dim(1) == 2);
 
-    if (node->padding() == luci::Padding::VALID)
-    {
-      output_height = (input_height + stride_height - effective_ker_height) / stride_height;
-      output_width = (input_width + stride_width - effective_ker_width) / stride_width;
-    }
-    else if (node->padding() == luci::Padding::SAME)
+  // Check all values of block_shape >= 1
+  uint32_t ele_count = const_block_shape->size<S32>();
+  for (uint32_t e = 0; e < ele_count; ++e)
+  {
+    auto val = const_block_shape->at<S32>(e);
+    if (val < 1)
     {
-      output_height = (input_height + stride_height - 1) / stride_height;
-      output_width = (input_width + stride_width - 1) / stride_width;
+      INTERNAL_EXN_V("All values of block_shape >= 1: ", e);
     }
-    else
-      LUCI_ASSERT(false, "Wrong padding type");
+  }
 
-    loco::TensorShape ofm_shape;
-    ofm_shape.rank(4);
-    ofm_shape.dim(0) = ifm_shape.dim(0);
-    ofm_shape.dim(1) = output_height;
-    ofm_shape.dim(2) = output_width;
-    ofm_shape.dim(3) = ker_shape.dim(3);
+  loco::TensorShape shape_output;
 
-    return loco::NodeShape{ofm_shape};
+  shape_output.rank(input_shape.rank());
+
+  int32_t output_batch_size = input_shape.dim(0).value();
+  for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
+  {
+    int dim_size = input_shape.dim(dim + 1).value();
+    dim_size += const_paddings->at<S32>(dim * 2);
+    dim_size += const_paddings->at<S32>(dim * 2 + 1);
+    shape_output.dim(dim + 1) = dim_size / const_block_shape->at<S32>(dim);
+
+    assert(dim_size % const_block_shape->at<S32>(dim) == 0);
+    output_batch_size = output_batch_size * const_block_shape->at<S32>(dim);
   }
+  shape_output.dim(0) = output_batch_size;
+  shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
 
-  loco::NodeShape visit(const luci::CircleDiv *node) final { return broadcast_xy(node); }
+  return loco::NodeShape{shape_output};
+}
 
-  loco::NodeShape visit(const luci::CircleElu *node) final
-  {
-    auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+loco::NodeShape infer_space_to_depth(const luci::CircleSpaceToDepth *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
 
-    return loco::NodeShape{input_shape};
+  // Only data format NHWC is supported
+  int32_t height = input_shape.dim(1).value();
+  int32_t width = input_shape.dim(2).value();
+  int32_t depth = input_shape.dim(3).value();
+
+  int block_size = node->block_size();
+
+  if (block_size < 2)
+    INTERNAL_EXN("Block size must be >= 2");
+
+  if ((height % block_size) || (width % block_size))
+  {
+    INTERNAL_EXN("The input tensor's height and width must be divisible by block_size");
   }
 
-  loco::NodeShape visit(const luci::CircleEqual *node) final { return broadcast_xy(node); }
+  loco::TensorShape output_shape;
+  output_shape.rank(4);
 
-  loco::NodeShape visit(const luci::CircleExp *node) final { return use_x(node); }
+  output_shape.dim(0) = input_shape.dim(0).value();
+  output_shape.dim(1) = height / block_size;
+  output_shape.dim(2) = width / block_size;
+  output_shape.dim(3) = block_size * block_size * depth;
 
-  loco::NodeShape visit(const luci::CircleExpandDims *node) final
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_sparse_to_dense(const luci::CircleSparseToDense *node)
+{
+  loco::TensorShape shape;
   {
-    const loco::DataType S32 = loco::DataType::S32;
-    auto x_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    if (x_shape.rank() == 0)
+    LUCI_ASSERT(node->output_shape(), "dims input should not be nullptr");
+
+    auto output_shape_node = dynamic_cast<luci::CircleConst *>(node->output_shape());
+    if (output_shape_node != nullptr)
     {
-      // This maybe for unknown shape. We use shape from the node itself.
-      return use_own(node);
+      // Only support node with S32
+      LUCI_ASSERT(output_shape_node->dtype() == loco::DataType::S32,
+                  "Only support int32 CircleConst");
+
+      if (output_shape_node->rank() != 1)
+        INTERNAL_EXN_V("Only support rank 1 CircleConst",
+                       oops::to_uint32(output_shape_node->rank()));
+
+      shape.rank(output_shape_node->size<loco::DataType::S32>());
+
+      for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+      {
+        shape.dim(axis) = output_shape_node->at<loco::DataType::S32>(axis);
+      }
     }
-    auto const_axis = loco::must_cast<luci::CircleConst *>(node->axis());
-    LUCI_ASSERT(const_axis->dtype() == S32, "Only support int32 CircleConst for axis");
-    if (const_axis->rank() != 0 && const_axis->rank() != 1)
+    else
     {
-      INTERNAL_EXN_V("Non-scalar axis in OP", node->opnum());
+      shape = own_shape(node);
     }
-    int32_t axis = const_axis->at<S32>(0);
-    LUCI_ASSERT((axis <= static_cast<int32_t>(x_shape.rank())) &&
-                    (axis >= -1 - static_cast<int32_t>(x_shape.rank())),
-                "Axis has to be between [-(D+1), D], where D is rank of input.");
-    size_t positive_axis = axis < 0 ? x_shape.rank() + axis + 1 : axis;
-    loco::TensorShape output_shape;
-    output_shape.rank(x_shape.rank() + 1);
-    size_t i = 0;
-    for (; i < positive_axis; i++)
-      output_shape.dim(i) = x_shape.dim(i);
-    output_shape.dim(i) = loco::Dimension(1);
-    for (; i < x_shape.rank(); i++)
-      output_shape.dim(i + 1) = x_shape.dim(i);
-    return loco::NodeShape{output_shape};
   }
 
-  loco::NodeShape visit(const luci::CircleFill *node) final
+  return loco::NodeShape{shape};
+}
+
+loco::NodeShape infer_strided_slice(const luci::CircleStridedSlice *node)
+{
+  auto begin_node = dynamic_cast<luci::CircleConst *>(node->begin());
+  auto end_node = dynamic_cast<luci::CircleConst *>(node->end());
+  auto strides_node = dynamic_cast<luci::CircleConst *>(node->strides());
+
+  if (begin_node == nullptr || end_node == nullptr || strides_node == nullptr)
   {
-    loco::TensorShape shape;
-    {
-      LUCI_ASSERT(node->dims(), "dims input should not be nullptr");
+    return use_own(node);
+  }
 
-      auto dims_node = dynamic_cast<luci::CircleConst *>(node->dims());
-      if (dims_node != nullptr)
-      {
-        // Only support node with S32
-        LUCI_ASSERT(dims_node->dtype() == loco::DataType::S32, "Only support int32 CircleConst");
+  loco::TensorShape shape = infer_output_shape(node);
+  return loco::NodeShape{shape};
+}
 
-        if (dims_node->rank() != 1)
-          INTERNAL_EXN_V("Only support rank 1 CircleConst", oops::to_uint32(dims_node->rank()));
+loco::NodeShape infer_squeeze(const luci::CircleSqueeze *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
 
-        shape.rank(dims_node->dim(0).value());
+  // TODO input shape may be unknown before runtime
+  std::vector<bool> do_squeeze(input_shape.rank(), false);
+  uint32_t num_squeezed = 0;
 
-        for (uint32_t axis = 0; axis < shape.rank(); ++axis)
-        {
-          shape.dim(axis) = dims_node->at<loco::DataType::S32>(axis);
-        }
+  if (!node->squeeze_dims().empty())
+  {
+    // SqueezeDims not empty, squeeze only dims specified
+    for (int32_t raw_dim : node->squeeze_dims())
+    {
+      int32_t dim = raw_dim < 0 ? raw_dim + input_shape.rank() : raw_dim;
+
+      if (dim < 0 || static_cast<uint32_t>(dim) >= input_shape.rank() ||
+          input_shape.dim(dim).value() != 1)
+      {
+        INTERNAL_EXN("invalid dimention specified to Squeeze");
       }
-      else
+
+      if (!do_squeeze[dim])
+        ++num_squeezed;
+      do_squeeze[dim] = true;
+    }
+  }
+  else
+  {
+    // SqueezeDims empty, squeeze any dims with size == 1
+    for (uint32_t dim = 0; dim < input_shape.rank(); ++dim)
+    {
+      if (input_shape.dim(dim) == 1)
       {
-        shape = own_shape(node);
+        do_squeeze[dim] = true;
+        ++num_squeezed;
       }
     }
+  }
 
-    return loco::NodeShape{shape};
+  loco::TensorShape output_shape;
+  output_shape.rank(input_shape.rank() - num_squeezed);
+
+  for (uint32_t in_dim = 0, out_dim = 0; in_dim < input_shape.rank(); ++in_dim)
+  {
+    if (!do_squeeze[in_dim])
+    {
+      output_shape.dim(out_dim++) = input_shape.dim(in_dim);
+    }
   }
 
-  loco::NodeShape visit(const luci::CircleFloor *node) final { return use_x(node); }
+  return loco::NodeShape{output_shape};
+}
 
-  loco::NodeShape visit(const luci::CircleFloorDiv *node) final { return broadcast_xy(node); }
+loco::NodeShape infer_tile(const luci::CircleTile *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
 
-  loco::NodeShape visit(const luci::CircleFloorMod *node) final { return broadcast_xy(node); }
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto multiples = loco::must_cast<luci::CircleConst *>(node->multiples());
 
-  loco::NodeShape visit(const luci::CircleFullyConnected *node) final
-  {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    auto weights_shape = loco::shape_get(node->weights()).as<loco::TensorShape>();
+  // TODO support non-const case
+  // TODO support S64 type
+  LUCI_ASSERT(multiples->dtype() == S32, "Only support int32 multiples");
+  LUCI_ASSERT(multiples->rank() == 1, "multiples should be rank 1")
 
-    // Checking shape capability for fully connected layer
-    // Input: a tensor of at least rank 2 [D1, D2, ... Dn]
-    // Weight: [# of units, K]
-    // Output: [D1 * D2 * ... * Dn / K, # of units]
-    if (input_shape.rank() < 2 || weights_shape.rank() != 2)
-    {
-      // Return node own shape if shape inference is not possible
-      return use_own(node);
-    }
+  uint32_t n = multiples->dim(0).value();
 
-    uint32_t input_size = 1;
-    for (uint32_t i = 0; i < input_shape.rank(); i++)
-    {
-      input_size = input_size * input_shape.dim(i).value();
-    }
-    const uint32_t batch_size = input_size / weights_shape.dim(1).value();
-    loco::TensorShape out_shape;
-    out_shape.rank(2);
-    out_shape.dim(0) = batch_size;
-    out_shape.dim(1) = weights_shape.dim(0);
+  LUCI_ASSERT(n == input_shape.rank(), "length of multiples should be the same with input rank");
 
-    return loco::NodeShape{out_shape};
-  }
+  loco::TensorShape output_shape;
 
-  loco::NodeShape visit(const luci::CircleGather *node) final
+  output_shape.rank(input_shape.rank());
+  for (uint32_t ni = 0; ni < n; ++ni)
   {
-    loco::TensorShape output_shape;
+    int32_t multiple = multiples->at<S32>(ni);
+    output_shape.dim(ni) = input_shape.dim(ni).value() * static_cast<uint32_t>(multiple);
+  }
 
-    const auto input_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
-    const auto positions_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
-    int32_t axis = node->axis();
+  return loco::NodeShape{output_shape};
+}
 
-    // If CircleGather input has a dynamic shape, it can't inference this shape. So, it returns the
-    // shape that node already has.
-    if (input_shape.rank() == 0 || positions_shape.rank() == 0)
-      return use_own(node);
+loco::NodeShape infer_transpose(const luci::CircleTranspose *node)
+{
+  auto input_shape = loco::shape_get(node->a()).as<loco::TensorShape>();
 
-    if (axis < 0)
-      axis += input_shape.rank();
+  auto perm_node = loco::must_cast<luci::CircleConst *>(node->perm());
 
-    output_shape.rank(input_shape.rank() - 1 + positions_shape.rank());
-    int32_t outdim_index = 0;
-    for (int32_t i = 0; i < axis; ++i)
-      output_shape.dim(outdim_index++) = input_shape.dim(i);
-    for (uint32_t i = 0; i < positions_shape.rank(); ++i)
-      output_shape.dim(outdim_index++) = positions_shape.dim(i);
-    for (uint32_t i = axis + 1; i < input_shape.rank(); ++i)
-      output_shape.dim(outdim_index++) = input_shape.dim(i);
+  loco::TensorShape output_shape;
+  output_shape.rank(input_shape.rank());
 
-    return loco::NodeShape{output_shape};
-  }
+  assert(perm_node->dtype() == loco::DataType::S32);
+  assert(input_shape.rank() == perm_node->template size<loco::DataType::S32>());
 
-  loco::NodeShape visit(const luci::CircleGatherNd *node) final
+  for (uint32_t out_axis = 0; out_axis < output_shape.rank(); out_axis++)
   {
-    loco::TensorShape output_shape;
+    auto in_axis = perm_node->template at<loco::DataType::S32>(out_axis);
+    output_shape.dim(out_axis) = input_shape.dim(in_axis);
+  }
 
-    const auto params_shape = loco::shape_get(node->params()).as<loco::TensorShape>();
-    const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+  return output_shape;
+}
 
-    const auto params_rank = params_shape.rank();
-    const auto indices_rank = indices_shape.rank();
+loco::NodeShape infer_transpose_conv(const luci::CircleTransposeConv *node)
+{
+  // TransposeConv's output shape is written in its 'inputSizes' argument
+  auto input_sizes_const = loco::must_cast<luci::CircleConst *>(node->inputSizes());
+  // TODO support non-const type
+  LUCI_ASSERT(input_sizes_const->dtype() == loco::DataType::S32, "Only support S32 dtype")
+  LUCI_ASSERT(input_sizes_const->rank() == 1 && input_sizes_const->dim(0).value() == 4,
+              "Only support rank 1 with 4 entries")
 
-    // see https://www.tensorflow.org/api_docs/python/tf/gather_nd
-    // output.shape = indices.shape[:-1] + params.shape[indices.shape[-1]:]
-    // batch_dims isn't supported in tflite
+  loco::TensorShape shape;
 
-    // TODO: replace exceptions with setting shape to unknown?
+  shape.rank(4);
+  for (uint32_t axis = 0; axis < 4; ++axis)
+    shape.dim(axis) = input_sizes_const->at<loco::DataType::S32>(axis);
 
-    if (!indices_shape.dim(indices_rank - 1).known())
-      INTERNAL_EXN("Last indices dimension is unknown");
+  return loco::NodeShape{shape};
+}
 
-    auto indices_last_dim = indices_shape.dim(indices_rank - 1).value();
+loco::NodeShape infer_unpack(const luci::CircleUnpack *node)
+{
+  // CircleUnpack provides list(array) of Tensors which has one less dimension of the input
+  // We'll set shape of CircleUnpack to shape of actual outputs
+  // TODO fix this if any problem rises
+  auto value_shape = loco::shape_get(node->value()).as<loco::TensorShape>();
 
-    if (indices_last_dim > params_rank)
-      INTERNAL_EXN("Last indices dimension should be <= params rank");
+  auto axis = node->axis();
+  auto num = node->num();
+  auto rank = static_cast<int32_t>(value_shape.rank());
 
-    const uint32_t output_rank = indices_rank + params_rank - indices_last_dim - 1;
+  if (rank == 0)
+  {
+    // Unknown shape
+    return use_own(node);
+  }
 
-    output_shape.rank(output_rank);
+  LUCI_ASSERT(-rank <= axis && axis < rank, "Axis is out of range");
 
-    uint32_t output_index = 0;
-    for (uint32_t i = 0; i < indices_rank - 1; ++i)
-    {
-      auto &dim = indices_shape.dim(i);
-      if (!dim.known())
-        INTERNAL_EXN("Unknown indices dimension is unsupported");
-      output_shape.dim(output_index++).set(dim.value());
-    }
+  if (axis < 0)
+    axis += rank;
 
-    for (uint32_t i = indices_last_dim; i < params_rank; ++i)
-    {
-      auto &dim = params_shape.dim(i);
-      if (!dim.known())
-        INTERNAL_EXN("Unknown params dimension is unsupported");
-      output_shape.dim(output_index++).set(dim.value());
-    }
+  LUCI_ASSERT(num == static_cast<int32_t>(value_shape.dim(axis).value()),
+              "num, axis maybe incorrect");
 
-    return loco::NodeShape{output_shape};
+  loco::TensorShape output_shape;
+  output_shape.rank(rank - 1);
+
+  for (int32_t i = 0, o = 0; i < rank; ++i)
+  {
+    if (i != axis)
+      output_shape.dim(o++) = value_shape.dim(i);
   }
 
-  loco::NodeShape visit(const luci::CircleGreater *node) final { return broadcast_xy(node); }
+  return loco::NodeShape{output_shape};
+}
 
-  loco::NodeShape visit(const luci::CircleGreaterEqual *node) final { return broadcast_xy(node); }
+loco::NodeShape infer_unique(const luci::CircleUnique *node)
+{
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
 
-  loco::NodeShape visit(const luci::CircleIf *node) final
+  assert(input_shape.rank() == 1);
+
+  loco::TensorShape shape_output;
+  shape_output = own_shape(node);
+
+  return loco::NodeShape{shape_output};
+}
+
+// Circle Only
+loco::NodeShape infer_bcq_fully_connected(const luci::CircleBCQFullyConnected *node)
+{
+  loco::TensorShape out_shape;
+
+  auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+  auto weights_clusters = loco::must_cast<luci::CircleConst *>(node->weights_clusters());
+
+  LUCI_ASSERT(input_shape.rank() == 2, "Input rank of BCQFullyConnected should be 2");
+
+  int32_t qbits_sum = 0;
+  for (uint32_t i = 0; i < weights_clusters->dim(0).value(); ++i)
   {
-    // Shape of CircleIf is not used. Just use input 0
-    assert(node->input_count() > 0);
-    const auto input_shape = loco::shape_get(node->input(0)).as<loco::TensorShape>();
-    return loco::NodeShape{input_shape};
+    qbits_sum += weights_clusters->at<loco::DataType::S32>(i * 2 + 1);
   }
 
-  loco::NodeShape visit(const luci::CircleL2Normalize *node) final { return use_x(node); }
+  out_shape.rank(2);
+  out_shape.dim(0) = qbits_sum;
+  out_shape.dim(1) = input_shape.dim(1);
 
-  loco::NodeShape visit(const luci::CircleL2Pool2D *node) final
+  return loco::NodeShape{out_shape};
+}
+
+loco::NodeShape infer_bcq_gather(const luci::CircleBCQGather *node)
+{
+  loco::TensorShape input_shape;
+  loco::TensorShape output_shape;
+
+  const auto input_binary_shape = loco::shape_get(node->input_binary()).as<loco::TensorShape>();
+  const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
+  auto axis = node->axis();
+
+  auto input_clusters = loco::must_cast<luci::CircleConst *>(node->input_clusters());
+  auto qbits_sum = 0;
+  for (uint32_t i = 0; i < input_clusters->dim(0).value(); ++i)
   {
-    return infer_pool_2d_shape(node);
+    qbits_sum += input_clusters->at<loco::DataType::S32>(i * 2 + 1);
   }
 
-  loco::NodeShape visit(const luci::CircleLeakyRelu *node) final
+  input_shape.rank(2);
+  input_shape.dim(0) = qbits_sum;
+  input_shape.dim(1) = input_binary_shape.dim(1).value() * 32;
+
+  output_shape.rank(input_shape.rank() - 1 + indices_shape.rank());
+  int32_t outdim_index = 0;
+  for (int32_t i = 0; i < axis; ++i)
+    output_shape.dim(outdim_index++) = input_shape.dim(i);
+  for (uint32_t i = 0; i < indices_shape.rank(); ++i)
+    output_shape.dim(outdim_index++) = indices_shape.dim(i);
+  for (uint32_t i = axis + 1; i < input_shape.rank(); ++i)
+    output_shape.dim(outdim_index++) = input_shape.dim(i);
+
+  return loco::NodeShape{output_shape};
+}
+
+// Virtual
+loco::NodeShape infer_input(const luci::CircleInput *node)
+{
+  loco::TensorShape shape;
+
+  shape.rank(node->rank());
+  for (uint32_t axis = 0; axis < node->rank(); axis++)
+    shape.dim(axis) = node->dim(axis);
+
+  return loco::NodeShape{shape};
+}
+
+loco::NodeShape infer_output(const luci::CircleOutput *node)
+{
+  auto graph_outputs = node->graph()->outputs();
+  auto graph_output = graph_outputs->at(node->index());
+  auto output_shape = graph_output->shape();
+
+  return loco::NodeShape{*output_shape};
+}
+
+loco::NodeShape infer_if_out(const luci::CircleIfOut *node)
+{
+  /**
+   * @note  IF operator type and shape are that of the "then" and "else"
+   *        Graph Outputs.
+   */
+  auto circle_if = dynamic_cast<const luci::CircleIf *>(node->input());
+  if (circle_if == nullptr)
   {
-    const auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
-    return loco::NodeShape{input_shape};
+    INTERNAL_EXN("CircleIf IR is not configured correctly");
   }
 
-  loco::NodeShape visit(const luci::CircleLess *node) final { return broadcast_xy(node); }
+  auto index = node->index();
+  auto then_graph = circle_if->then_graph();
+  auto else_graph = circle_if->else_graph();
+  assert(then_graph != nullptr);
+  assert(else_graph != nullptr);
 
-  loco::NodeShape visit(const luci::CircleLessEqual *node) final { return broadcast_xy(node); }
+  // shape and type are assumed to be same
+  // these are checked at post_import_graph() in Import
+  auto then_outputs = loco::output_nodes(then_graph);
+  auto else_outputs = loco::output_nodes(else_graph);
+  assert(then_outputs.size() == else_outputs.size());
+  assert(index < static_cast<int32_t>(then_outputs.size()));
 
-  loco::NodeShape visit(const luci::CircleLocalResponseNormalization *node) final
+  auto then_out = loco::must_cast<luci::CircleOutput *>(then_outputs.at(index));
+  auto else_out = loco::must_cast<luci::CircleOutput *>(else_outputs.at(index));
+
+  auto then_graph_outputs = then_graph->outputs(); // loco::GraphOutput items
+  auto else_graph_outputs = else_graph->outputs();
+  assert(then_graph_outputs->size() == else_graph_outputs->size());
+
+  auto then_graph_output = then_graph_outputs->at(then_out->index());
+  auto else_graph_output = else_graph_outputs->at(else_out->index());
+  (void)else_graph_output; // make compiler happy for unused variable warnings
+  assert(*then_graph_output->shape() == *else_graph_output->shape());
+
+  return loco::NodeShape{*then_graph_output->shape()};
+}
+
+loco::NodeShape infer_non_max_suppression_v4_out(const luci::CircleNonMaxSuppressionV4Out *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
+
+  auto nmsv4 = dynamic_cast<const luci::CircleNonMaxSuppressionV4 *>(node->input());
+  if (nmsv4 == nullptr)
+    INTERNAL_EXN("CircleNonMaxSuppressionV4 IR is not configured correctly");
+
+  auto index = node->index();
+  if (index == 1)
+    return loco::TensorShape({0});
+
+  assert(index == 0);
+
+  auto unknown = loco::TensorShape{loco::Dimension()};
+  auto max_output_size = dynamic_cast<const luci::CircleConst *>(nmsv4->max_output_size());
+  if (max_output_size == nullptr)
+    return unknown; // we need CircleConst for max output size
+
+  LUCI_ASSERT(max_output_size->dtype() == S32, "Only support int32 for max_output_size");
+
+  if (max_output_size->size<S32>() < 1)
+    return unknown;
+
+  auto max_output_size_value = uint32_t(max_output_size->at<S32>(0));
+  return loco::TensorShape{max_output_size_value};
+}
+
+loco::NodeShape infer_non_max_suppression_v5_out(const luci::CircleNonMaxSuppressionV5Out *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
+
+  auto nmsv5 = dynamic_cast<const luci::CircleNonMaxSuppressionV5 *>(node->input());
+  if (nmsv5 == nullptr)
+    INTERNAL_EXN("CircleNonMaxSuppressionV5 IR is not configured correctly");
+
+  auto index = node->index();
+  if (index == 2)
+    return loco::TensorShape({0});
+
+  assert(index == 0 || index == 1);
+
+  auto unknown = loco::TensorShape{loco::Dimension()};
+  auto max_output_size = dynamic_cast<const luci::CircleConst *>(nmsv5->max_output_size());
+  if (max_output_size == nullptr)
+    return unknown; // we need CircleConst for max output size
+
+  LUCI_ASSERT(max_output_size->dtype() == S32, "Only support int32 for max_output_size");
+
+  if (max_output_size->size<S32>() < 1)
+    return unknown;
+
+  auto max_output_size_value = uint32_t(max_output_size->at<S32>(0));
+  return loco::TensorShape{max_output_size_value};
+}
+
+loco::NodeShape infer_split_out(const luci::CircleSplitOut *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
+
+  auto split = dynamic_cast<const luci::CircleSplit *>(node->input());
+  if (split == nullptr)
+    INTERNAL_EXN("CircleSplit IR is not configured correctly");
+
+  loco::NodeShape unknown;
+
+  auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
+
+  auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
+  if (split_dim == nullptr)
+    return unknown; // we need CircleConst for split_dim
+  LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim");
+
+  assert(split_dim->size<S32>() == 1);
+  auto split_dim_axis = split_dim->at<S32>(0);
+  if (split_dim_axis < 0)
+    split_dim_axis += split_shape.rank();
+
+  auto split_dim_value = split_shape.dim(split_dim_axis).value();
+  assert(split_dim_value % split->num_split() == 0);
+  const int split_depth = split_dim_value / split->num_split();
+
+  loco::TensorShape output_shape = split_shape;
+
+  // All shapes are equally same
+  output_shape.dim(split_dim_axis) = loco::Dimension(split_depth);
+
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_split_v_out(const luci::CircleSplitVOut *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
+
+  auto split = dynamic_cast<const luci::CircleSplitV *>(node->input());
+  if (split == nullptr)
+    INTERNAL_EXN("CircleSplit IR is not configured correctly");
+
+  loco::NodeShape unknown;
+
+  auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
+
+  auto size_splits = dynamic_cast<const luci::CircleConst *>(split->size_splits());
+  if (size_splits == nullptr)
+    return unknown; // we need CircleConst for size_splits
+  LUCI_ASSERT(size_splits->dtype() == S32, "Only support int32 for size_splits");
+
+  auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
+  if (split_dim == nullptr)
+    return unknown; // we need CircleConst for split_dim
+  LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim");
+
+  // fetch axis
+  assert(split_dim->size<S32>() == 1);
+  auto split_dim_axis = split_dim->at<S32>(0);
+  if (split_dim_axis < 0)
+    split_dim_axis += split_shape.rank();
+
+  // interpret size_splits values
+  int32_t size_splits_count = static_cast<int32_t>(size_splits->size<S32>());
+  assert(size_splits_count == split->num_split());
+
+  int64_t minus_one_count = 0, size_splits_sum = 0;
+  for (int32_t idx = 0; idx < size_splits_count; ++idx)
   {
-    const auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    return loco::NodeShape{input_shape};
+    auto size = size_splits->at<S32>(idx);
+    assert(size >= -1);
+    if (size == -1)
+      ++minus_one_count;
+    else
+      size_splits_sum += size;
   }
+  if (minus_one_count > 1)
+    INTERNAL_EXN("CircleSplitV size_splits has more than two -1 values");
 
-  loco::NodeShape visit(const luci::CircleLog *node) final { return use_x(node); }
+  // calcuate this SplitVOut shape
+  auto input_size = split_shape.dim(split_dim_axis).value();
+  assert(size_splits_sum <= input_size);
 
-  loco::NodeShape visit(const luci::CircleLogicalAnd *node) final { return use_x(node); }
+  auto index_this = node->index();
+  assert(0 <= index_this && index_this < split->num_split());
+  auto split_depth = size_splits->at<S32>(index_this);
+  if (split_depth == -1)
+    split_depth = input_size - size_splits_sum;
 
-  loco::NodeShape visit(const luci::CircleLogicalNot *node) final { return use_x(node); }
+  loco::TensorShape output_shape = split_shape;
 
-  loco::NodeShape visit(const luci::CircleLogicalOr *node) final { return use_x(node); }
+  output_shape.dim(split_dim_axis) = loco::Dimension(split_depth);
 
-  loco::NodeShape visit(const luci::CircleLogistic *node) final { return use_x(node); }
+  return loco::NodeShape{output_shape};
+}
 
-  loco::NodeShape visit(const luci::CircleMatrixSetDiag *node) final
-  {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
+loco::NodeShape infer_top_k_v2_out(const luci::CircleTopKV2Out *node)
+{
+  const loco::DataType S32 = loco::DataType::S32;
 
-    auto rank = diagonal_shape.rank();
+  auto topkv2 = dynamic_cast<const luci::CircleTopKV2 *>(node->input());
+  if (topkv2 == nullptr)
+    INTERNAL_EXN("CircleSplit IR is not configured correctly");
 
-    LUCI_ASSERT(rank == input_shape.rank() - 1, "diagonal rank = input rank - 1");
+  // shape of topkv2 is same as topkv2->input()
+  auto input_shape = loco::shape_get(topkv2).as<loco::TensorShape>();
 
-    for (uint32_t i = 0; i < rank - 1; i++)
-    {
-      LUCI_ASSERT(diagonal_shape.dim(i) == input_shape.dim(i), "diagonal dims = input dims");
-    }
+  auto node_k = loco::must_cast<const luci::CircleConst *>(topkv2->k());
+  LUCI_ASSERT(node_k->dtype() == S32, "Only support Int32");
+  assert(node_k->size<S32>() == 1);
 
-    auto dim = std::min(input_shape.dim(rank - 1).value(), input_shape.dim(rank).value());
+  loco::TensorShape output_shape;
 
-    LUCI_ASSERT(dim == diagonal_shape.dim(rank - 1), "Max diag len error");
+  output_shape.rank(input_shape.rank());
+  for (uint32_t idx = 0; idx < input_shape.rank() - 1; ++idx)
+  {
+    output_shape.dim(idx) = input_shape.dim(idx);
+  }
+  output_shape.dim(input_shape.rank() - 1) = node_k->at<S32>(0);
 
-    return loco::NodeShape{input_shape};
+  return loco::NodeShape{output_shape};
+}
+
+loco::NodeShape infer_unique_out(const luci::CircleUniqueOut *node)
+{
+  if (node->index() == 0)
+  {
+    auto unique_shape = own_shape(node);
+    return loco::NodeShape{unique_shape};
   }
+  assert(node->index() == 1);
+  auto unique = loco::must_cast<luci::CircleUnique *>(node->input());
+  auto unique_shape = loco::shape_get(unique->input()).as<loco::TensorShape>();
 
-  loco::NodeShape visit(const luci::CircleLogSoftmax *node) final { return use_logits(node); }
+  assert(unique_shape.rank() == 1);
 
-  loco::NodeShape visit(const luci::CircleMatrixDiag *node) final
+  loco::TensorShape shape_output;
+  shape_output.rank(1);
+  shape_output.dim(0) = unique_shape.dim(0);
+  return loco::NodeShape{shape_output};
+}
+
+loco::NodeShape infer_unpack_out(const luci::CircleUnpackOut *node)
+{
+  auto unpack = dynamic_cast<const luci::CircleUnpack *>(node->input());
+  if (unpack == nullptr)
   {
-    loco::TensorShape output_shape;
+    INTERNAL_EXN("CircleUnpack IR is not configured correctly");
+  }
 
-    auto diagonal_shape = loco::shape_get(node->diagonal()).as<loco::TensorShape>();
-    auto rank = diagonal_shape.rank();
+  auto unpack_shape = loco::shape_get(unpack).as<loco::TensorShape>();
 
-    output_shape.rank(rank + 1);
+  return loco::NodeShape{unpack_shape};
+}
 
-    for (uint32_t i = 0; i < rank; i++)
-    {
-      output_shape.dim(i) = diagonal_shape.dim(i);
-    }
+loco::NodeShape infer_while_out(const luci::CircleWhileOut *node)
+{
+  /**
+   * @note  WHILE operator's shape is the same with the "cond"
+   *        Graph input.
+   */
+  auto circle_while = dynamic_cast<const luci::CircleWhile *>(node->input());
+  if (circle_while == nullptr)
+  {
+    INTERNAL_EXN("CircleWhile IR is not configured correctly");
+  }
 
-    output_shape.dim(rank) = diagonal_shape.dim(rank - 1);
+  auto index = node->index();
+  auto cond_graph = circle_while->cond_graph();
+  assert(cond_graph != nullptr);
 
-    return loco::NodeShape{output_shape};
+  // Assumption: the index of CircleWhileOut matches with the index of input nodes returned by
+  // loco::input_nodes
+  auto cond_inputs = loco::input_nodes(cond_graph);
+  auto cond_in = loco::must_cast<luci::CircleInput *>(cond_inputs.at(index));
+
+  auto cond_graph_inputs = cond_graph->inputs();
+  auto cond_graph_input = cond_graph_inputs->at(cond_in->index());
+
+  auto cond_graph_input_shape = *cond_graph_input->shape();
+  auto this_shape = own_shape(node);
+
+  if (!(this_shape == cond_graph_input_shape))
+  {
+    LOGGER(l);
+    WARN(l) << "Warning: CircleWhileOut '" << node->name() << "' shape mispatch " << this_shape
+            << " vs " << cond_graph_input_shape;
   }
 
-  loco::NodeShape visit(const luci::CircleMaximum *node) final { return broadcast_xy(node); }
+  return loco::NodeShape{this_shape};
+}
 
-  loco::NodeShape visit(const luci::CircleMaxPool2D *node) final
+/**
+ * @brief Class to infer the shape of CircleNode
+ *
+ * @note All CircleNode's inputs and outputs are always loco::Domain::Tensor
+ */
+class ShapeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::NodeShape>
+{
+public:
+  loco::NodeShape visit(const luci::CircleAbs *node) final { return use_x(node); }
+
+  loco::NodeShape visit(const luci::CircleAdd *node) final { return broadcast_xy(node); }
+
+  loco::NodeShape visit(const luci::CircleAddN *node) final { return infer_add_n(node); }
+
+  loco::NodeShape visit(const luci::CircleArgMax *node) final { return infer_arg_max(node); }
+
+  loco::NodeShape visit(const luci::CircleArgMin *node) final { return infer_arg_min(node); }
+
+  loco::NodeShape visit(const luci::CircleAveragePool2D *node) final
   {
     return infer_pool_2d_shape(node);
   }
 
-  loco::NodeShape visit(const luci::CircleMean *node) final
+  loco::NodeShape visit(const luci::CircleBatchMatMul *node) final
   {
-    auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
-    return loco::NodeShape{output_shape};
-  }
+    auto x_shape = loco::shape_get(node->x()).as<loco::TensorShape>();
+    auto y_shape = loco::shape_get(node->y()).as<loco::TensorShape>();
 
-  loco::NodeShape visit(const luci::CircleMinimum *node) final { return broadcast_xy(node); }
+    return infer_batchmatmul_shape(x_shape, y_shape, node->adj_x(), node->adj_y());
+  }
 
-  loco::NodeShape visit(const luci::CircleMirrorPad *node) final
+  loco::NodeShape visit(const luci::CircleBatchToSpaceND *node) final
   {
-    const loco::DataType S32 = loco::DataType::S32;
+    return infer_batch_to_space_nd(node);
+  }
 
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+  loco::NodeShape visit(const luci::CircleCast *node) final { return use_x(node); }
 
-    // TODO support non-const case
-    // TODO support other data type
-    LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now");
-    LUCI_ASSERT(paddings->rank() == 2, "paddings should be rank 2")
+  loco::NodeShape visit(const luci::CircleCeil *node) final { return use_x(node); }
 
-    int32_t n = paddings->dim(0).value();
-    int32_t v = paddings->dim(1).value();
+  loco::NodeShape visit(const luci::CircleConcatenation *node) final
+  {
+    return infer_concatenation(node);
+  }
 
-    LUCI_ASSERT(v == 2, "paddings should be [n, 2]");
-    LUCI_ASSERT(n == int32_t(input_shape.rank()),
-                "paddings [n, 2] should have same value of input rank");
+  loco::NodeShape visit(const luci::CircleConst *node) final { return use_own(node); }
 
-    loco::TensorShape output_shape;
+  loco::NodeShape visit(const luci::CircleConv2D *node) final { return infer_conv2d(node); }
 
-    output_shape.rank(input_shape.rank());
-    for (int32_t ni = 0; ni < n; ++ni)
-    {
-      int32_t idx = ni * 2;
-      int value = input_shape.dim(ni).value();
-      value += paddings->at<S32>(idx + 0); // left
-      value += paddings->at<S32>(idx + 1); // right
-      output_shape.dim(ni) = value;
-    }
+  loco::NodeShape visit(const luci::CircleCos *node) final { return use_x(node); }
 
-    return loco::NodeShape{output_shape};
+  loco::NodeShape visit(const luci::CircleCustom *node) final { return use_own(node); }
+
+  loco::NodeShape visit(const luci::CircleDepthToSpace *node) final
+  {
+    return infer_depth_to_space(node);
   }
 
-  loco::NodeShape visit(const luci::CircleMul *node) final { return broadcast_xy(node); }
+  loco::NodeShape visit(const luci::CircleDepthwiseConv2D *node) final
+  {
+    return infer_depthwise_conv2d(node);
+  }
 
-  loco::NodeShape visit(const luci::CircleNeg *node) final { return use_x(node); }
+  loco::NodeShape visit(const luci::CircleDiv *node) final { return broadcast_xy(node); }
 
-  loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4 *node) final
+  loco::NodeShape visit(const luci::CircleElu *node) final
   {
-    const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
-    return loco::NodeShape{boxes_shape};
+    auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+
+    return loco::NodeShape{input_shape};
   }
 
-  loco::NodeShape visit(const luci::CircleNotEqual *node) final { return broadcast_xy(node); }
+  loco::NodeShape visit(const luci::CircleEqual *node) final { return broadcast_xy(node); }
 
-  loco::NodeShape visit(const luci::CircleOneHot *node) final
+  loco::NodeShape visit(const luci::CircleExp *node) final { return use_x(node); }
+
+  loco::NodeShape visit(const luci::CircleExpandDims *node) final
   {
-    const loco::DataType S32 = loco::DataType::S32;
-    auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
-    // Only support OneHot node's depth() is CircleConst with type S32
-    // TODO support depth with other types
-    auto depth = loco::must_cast<luci::CircleConst *>(node->depth());
-    LUCI_ASSERT(depth->dtype() == S32, "Only support int32 CircleConst");
-    if (depth->rank() != 0)
-      INTERNAL_EXN_V("Only support rank 0 CircleOneHot in Depth", oops::to_uint32(depth->rank()));
-    loco::TensorShape output_shape;
-    output_shape.rank(indices_shape.rank() + 1);
-    auto axis = node->axis();
-    if (axis < 0)
-      axis += indices_shape.rank() + 1;
-    LUCI_ASSERT(0 <= axis, "Axis is out of range");
-    LUCI_ASSERT(static_cast<uint32_t>(axis) <= indices_shape.rank(), "Axis is out of range");
-    uint32_t j = 0;
-    for (uint32_t i = 0; i < output_shape.rank(); i++)
-    {
-      if (i == static_cast<uint32_t>(axis))
-      {
-        output_shape.dim(i) = depth->at<S32>(0);
-      }
-      else
-      {
-        output_shape.dim(i) = indices_shape.dim(j++);
-      }
-    }
-    return loco::NodeShape{output_shape};
+    return infer_expand_dims(node);
   }
 
-  loco::NodeShape visit(const luci::CirclePack *node) final
+  loco::NodeShape visit(const luci::CircleFill *node) final { return infer_fill(node); }
+
+  loco::NodeShape visit(const luci::CircleFloor *node) final { return use_x(node); }
+
+  loco::NodeShape visit(const luci::CircleFloorDiv *node) final { return broadcast_xy(node); }
+
+  loco::NodeShape visit(const luci::CircleFloorMod *node) final { return broadcast_xy(node); }
+
+  loco::NodeShape visit(const luci::CircleFullyConnected *node) final
   {
-    LUCI_ASSERT(node->values_count() > 0, "Only support one or more inputs");
+    return infer_fully_connected(node);
+  }
 
-    auto first_shape = loco::shape_get(node->values(0)).as<loco::TensorShape>();
-    // Make sure all inputs have the same shape.
-    for (uint32_t i = 1; i < node->values_count(); ++i)
-    {
-      auto in_shape = loco::shape_get(node->values(i)).as<loco::TensorShape>();
-      LUCI_ASSERT(loco::NodeShape{first_shape} == loco::NodeShape{in_shape},
-                  "All inputs must have the same shape");
-    }
+  loco::NodeShape visit(const luci::CircleGather *node) final { return infer_gather(node); }
 
-    // Checking shape capability for pack layer
-    // Input: tensors [D1, D2, ... Dn]
-    // Axis: K
-    // Output: [D1, D2, ... , D_K-1, n, D_K+1, ... Dn]
-    auto axis = node->axis();
-    if (axis < 0)
-      axis += first_shape.rank() + 1;
+  loco::NodeShape visit(const luci::CircleGatherNd *node) final { return infer_gather_nd(node); }
 
-    LUCI_ASSERT(0 <= axis, "Axis is out of range");
-    LUCI_ASSERT(static_cast<uint32_t>(axis) <= first_shape.rank(), "Axis is out of range");
+  loco::NodeShape visit(const luci::CircleGreater *node) final { return broadcast_xy(node); }
 
-    loco::TensorShape output_shape;
-    output_shape.rank(first_shape.rank() + 1);
+  loco::NodeShape visit(const luci::CircleGreaterEqual *node) final { return broadcast_xy(node); }
 
-    uint32_t j = 0;
-    for (uint32_t i = 0; i < output_shape.rank(); ++i)
-    {
-      if (i == static_cast<uint32_t>(axis))
-      {
-        output_shape.dim(i) = node->values_count();
-      }
-      else
-      {
-        output_shape.dim(i) = first_shape.dim(j++);
-      }
-    }
+  loco::NodeShape visit(const luci::CircleIf *node) final
+  {
+    // Shape of CircleIf is not used. Just use input 0
+    assert(node->input_count() > 0);
+    const auto input_shape = loco::shape_get(node->input(0)).as<loco::TensorShape>();
+    return loco::NodeShape{input_shape};
+  }
 
-    return loco::NodeShape{output_shape};
+  loco::NodeShape visit(const luci::CircleL2Normalize *node) final { return use_x(node); }
+
+  loco::NodeShape visit(const luci::CircleL2Pool2D *node) final
+  {
+    return infer_pool_2d_shape(node);
   }
 
-  loco::NodeShape visit(const luci::CirclePad *node) final
+  loco::NodeShape visit(const luci::CircleLeakyRelu *node) final
   {
-    const loco::DataType S32 = loco::DataType::S32;
+    const auto input_shape = loco::shape_get(node->features()).as<loco::TensorShape>();
+    return loco::NodeShape{input_shape};
+  }
 
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    auto paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
+  loco::NodeShape visit(const luci::CircleLess *node) final { return broadcast_xy(node); }
 
-    // TODO support non-const case
-    // TODO support other data type
-    LUCI_ASSERT(paddings->dtype() == S32, "Only support int 32 for now");
-    LUCI_ASSERT(paddings->rank() == 2, "paddings should be rank 2")
+  loco::NodeShape visit(const luci::CircleLessEqual *node) final { return broadcast_xy(node); }
 
-    int32_t n = paddings->dim(0).value();
-    int32_t v = paddings->dim(1).value();
+  loco::NodeShape visit(const luci::CircleLocalResponseNormalization *node) final
+  {
+    const auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
+    return loco::NodeShape{input_shape};
+  }
 
-    LUCI_ASSERT(v == 2, "paddings should be [n, 2]");
-    LUCI_ASSERT(n == int32_t(input_shape.rank()),
-                "paddings [n, 2] should have same value of input rank");
+  loco::NodeShape visit(const luci::CircleLog *node) final { return use_x(node); }
 
-    loco::TensorShape output_shape;
+  loco::NodeShape visit(const luci::CircleLogicalAnd *node) final { return use_x(node); }
 
-    output_shape.rank(input_shape.rank());
-    for (int32_t ni = 0; ni < n; ++ni)
-    {
-      int32_t idx = ni * 2;
-      int value = input_shape.dim(ni).value();
-      value += paddings->at<S32>(idx + 0); // left
-      value += paddings->at<S32>(idx + 1); // right
-      output_shape.dim(ni) = value;
-    }
+  loco::NodeShape visit(const luci::CircleLogicalNot *node) final { return use_x(node); }
 
-    return loco::NodeShape{output_shape};
-  }
+  loco::NodeShape visit(const luci::CircleLogicalOr *node) final { return use_x(node); }
 
-  loco::NodeShape visit(const luci::CirclePow *node) final { return broadcast_xy(node); }
+  loco::NodeShape visit(const luci::CircleLogistic *node) final { return use_x(node); }
+
+  loco::NodeShape visit(const luci::CircleLogSoftmax *node) final { return use_logits(node); }
 
-  loco::NodeShape visit(const luci::CirclePRelu *node) final
+  loco::NodeShape visit(const luci::CircleMatrixDiag *node) final
   {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    auto alpha_shape = loco::shape_get(node->alpha()).as<loco::TensorShape>();
+    return infer_matrix_diag(node);
+  }
+
+  loco::NodeShape visit(const luci::CircleMatrixSetDiag *node) final
+  {
+    return infer_matrix_set_diag(node);
+  }
 
-    auto output_shape = broadcast_shape(input_shape, alpha_shape);
+  loco::NodeShape visit(const luci::CircleMaximum *node) final { return broadcast_xy(node); }
+
+  loco::NodeShape visit(const luci::CircleMaxPool2D *node) final
+  {
+    return infer_pool_2d_shape(node);
+  }
 
+  loco::NodeShape visit(const luci::CircleMean *node) final
+  {
+    auto output_shape = infer_reducer(node->input(), node->reduction_indices(), node->keep_dims());
     return loco::NodeShape{output_shape};
   }
 
-  loco::NodeShape visit(const luci::CircleRange *node) final
+  loco::NodeShape visit(const luci::CircleMinimum *node) final { return broadcast_xy(node); }
+
+  loco::NodeShape visit(const luci::CircleMirrorPad *node) final { return infer_mirror_pad(node); }
+
+  loco::NodeShape visit(const luci::CircleMul *node) final { return broadcast_xy(node); }
+
+  loco::NodeShape visit(const luci::CircleNeg *node) final { return use_x(node); }
+
+  loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4 *node) final
   {
-    loco::TensorShape output_shape;
-    output_shape.rank(1);
+    const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
+    return loco::NodeShape{boxes_shape};
+  }
 
-    auto start_node = dynamic_cast<luci::CircleConst *>(node->start());
-    auto limit_node = dynamic_cast<luci::CircleConst *>(node->limit());
-    auto delta_node = dynamic_cast<luci::CircleConst *>(node->delta());
+  loco::NodeShape visit(const luci::CircleNonMaxSuppressionV5 *node) final
+  {
+    const auto boxes_shape = loco::shape_get(node->boxes()).as<loco::TensorShape>();
+    return loco::NodeShape{boxes_shape};
+  }
 
-    if (start_node == nullptr || limit_node == nullptr || delta_node == nullptr)
-    {
-      return use_own(node);
-    }
+  loco::NodeShape visit(const luci::CircleNotEqual *node) final { return broadcast_xy(node); }
 
-    double start = 0, limit = 0, delta = 0;
+  loco::NodeShape visit(const luci::CircleOneHot *node) final { return infer_one_hot(node); }
 
-#define GET_RANGE_PARAM(DT)         \
-  start = start_node->scalar<DT>(); \
-  limit = limit_node->scalar<DT>(); \
-  delta = delta_node->scalar<DT>();
+  loco::NodeShape visit(const luci::CirclePack *node) final { return infer_pack(node); }
 
-    switch (start_node->dtype())
-    {
-      case loco::DataType::FLOAT32:
-        GET_RANGE_PARAM(loco::DataType::FLOAT32)
-        break;
-      case loco::DataType::S32:
-        GET_RANGE_PARAM(loco::DataType::S32)
-        break;
-      default:
-        INTERNAL_EXN("Range data type not supported");
-    }
+  loco::NodeShape visit(const luci::CirclePad *node) final { return infer_pad(node); }
 
-#undef GET_RANGE_PARAM
+  loco::NodeShape visit(const luci::CirclePadV2 *node) final { return infer_pad_v2(node); }
 
-    if (delta == 0)
-      INTERNAL_EXN("Delta can not be zero");
+  loco::NodeShape visit(const luci::CirclePow *node) final { return broadcast_xy(node); }
 
-    output_shape.dim(0) = ceil((limit - start) / delta);
+  loco::NodeShape visit(const luci::CirclePRelu *node) final { return infer_p_relu(node); }
 
-    return loco::NodeShape{output_shape};
-  }
+  loco::NodeShape visit(const luci::CircleRange *node) final { return infer_range(node); }
 
   loco::NodeShape visit(const luci::CircleRank *) final
   {
@@ -1242,136 +2248,16 @@ public:
    *
    * TODO Change this policy when not appropriate
    */
-  loco::NodeShape visit(const luci::CircleReshape *node) final
-  {
-    LOGGER(l);
-
-    const loco::DataType S32 = loco::DataType::S32;
-
-    loco::TensorShape shape_by_input;
-    {
-      LUCI_ASSERT(node->shape(), "2nd input shape() should not be nullptr");
-
-      // Only support node's shape() is CircleConst with S32
-      // TODO support other node with other types
-      auto const_shape_node = dynamic_cast<luci::CircleConst *>(node->shape());
-      if (const_shape_node != nullptr)
-      {
-        LUCI_ASSERT(const_shape_node->dtype() == S32, "Only support int32 CircleConst");
-
-        shape_by_input.rank(const_shape_node->size<S32>());
-
-        for (uint32_t axis = 0; axis < shape_by_input.rank(); ++axis)
-        {
-          shape_by_input.dim(axis) = const_shape_node->at<S32>(axis);
-        }
-      }
-      else
-      {
-        // We use shape from the node itself
-        shape_by_input = own_shape(node);
-      }
-    }
-
-    loco::TensorShape shape_by_attr;
-    {
-      shape_by_attr.rank(node->newShape()->rank());
-
-      for (uint32_t axis = 0; axis < shape_by_attr.rank(); ++axis)
-      {
-        shape_by_attr.dim(axis) = node->newShape()->dim(axis);
-      }
-    }
-
-    if (!(shape_by_input == shape_by_attr))
-    {
-      INFO(l) << "CircleReshape: Two new shape information mismatched : " << std::endl;
-      INFO(l) << "   shape_by_input : " << shape_by_input << std::endl;
-      INFO(l) << "   shape_by_attr : " << shape_by_attr << std::endl;
-    }
-
-    loco::TensorShape output_shape = shape_by_input;
-
-    // One of the dimensions can have special value -1, meaning its actual value should be inferred.
-    const auto input_shape = loco::shape_get(node->tensor()).as<loco::TensorShape>();
-    const uint32_t input_element_count = loco::element_count(&input_shape);
-    uint32_t output_element_count = 1;
-    uint32_t unknown_dim_index = UINT32_MAX;
-    for (uint32_t dim_index = 0; dim_index < output_shape.rank(); ++dim_index)
-    {
-      const uint32_t dim_value = output_shape.dim(dim_index).value();
-      if (static_cast<int>(dim_value) == -1)
-      {
-        LUCI_ASSERT(unknown_dim_index == UINT32_MAX, "More than one unknown dimension");
-        unknown_dim_index = dim_index;
-      }
-      else
-      {
-        output_element_count *= dim_value;
-      }
-    }
-    if (unknown_dim_index != UINT32_MAX)
-    {
-      output_shape.dim(unknown_dim_index) = input_element_count / output_element_count;
-    }
-
-    return loco::NodeShape{output_shape};
-  }
+  loco::NodeShape visit(const luci::CircleReshape *node) final { return infer_reshape(node); }
 
   loco::NodeShape visit(const luci::CircleResizeBilinear *node) final
   {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
-    if (input_shape.rank() != 4)
-      INTERNAL_EXN("Expected ResizeBilinear input to have rank 4");
-
-    auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
-
-    if (const_node->dtype() != loco::DataType::S32)
-      INTERNAL_EXN("Only S32 datatype is supported for ResizeBilinear size");
-
-    if (const_node->rank() != 1)
-      INTERNAL_EXN("Expected size tensor of rank 1");
-
-    if (const_node->dim(0).value() != 2)
-      INTERNAL_EXN("Expected size tensor with shape [2]");
-
-    loco::TensorShape output_shape;
-    output_shape.rank(4);
-    output_shape.dim(0) = input_shape.dim(0);
-    output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
-    output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
-    output_shape.dim(3) = input_shape.dim(3);
-
-    return loco::NodeShape{output_shape};
+    return infer_resize_bilinear(node);
   }
 
   loco::NodeShape visit(const luci::CircleResizeNearestNeighbor *node) final
   {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
-    if (input_shape.rank() != 4)
-      INTERNAL_EXN("Expected ResizeNearesNeighbor input to have rank 4");
-
-    auto *const_node = loco::must_cast<luci::CircleConst *>(node->size());
-
-    if (const_node->dtype() != loco::DataType::S32)
-      INTERNAL_EXN("Only S32 datatype is supported for ResizeNearesNeighbor size");
-
-    if (const_node->rank() != 1)
-      INTERNAL_EXN("Expected size tensor of rank 1");
-
-    if (const_node->dim(0).value() != 2)
-      INTERNAL_EXN("Expected size tensor with shape [2]");
-
-    loco::TensorShape output_shape;
-    output_shape.rank(4);
-    output_shape.dim(0) = input_shape.dim(0);
-    output_shape.dim(1) = const_node->at<loco::DataType::S32>(0);
-    output_shape.dim(2) = const_node->at<loco::DataType::S32>(1);
-    output_shape.dim(3) = input_shape.dim(3);
-
-    return loco::NodeShape{output_shape};
+    return infer_resize_nearest_neighbor(node);
   }
 
   loco::NodeShape visit(const luci::CircleReverseSequence *node) final
@@ -1395,276 +2281,38 @@ public:
 
   loco::NodeShape visit(const luci::CircleRsqrt *node) final { return use_x(node); }
 
-  loco::NodeShape visit(const luci::CircleScatterNd *node) final
-  {
-    loco::TensorShape output_shape;
-
-    auto shape_node = loco::must_cast<luci::CircleConst *>(node->shape());
-
-    const loco::DataType S32 = loco::DataType::S32;
-    const loco::DataType S64 = loco::DataType::S64;
-
-    std::vector<int64_t> vect_shape;
-
-    if (shape_node->dtype() == S32)
-      vect_shape = vector_from_constant<S32>(shape_node);
-    else if (shape_node->dtype() == S64)
-      vect_shape = vector_from_constant<S64>(shape_node);
-    else
-      LUCI_ASSERT(false, "Only support int32/int64 for shape()");
-
-    output_shape.rank(vect_shape.size());
-    for (uint32_t i = 0; i < vect_shape.size(); ++i)
-      output_shape.dim(i) = vect_shape[i];
-
-    return loco::NodeShape{output_shape};
-  }
+  loco::NodeShape visit(const luci::CircleScatterNd *node) final { return infer_scatter_nd(node); }
 
   loco::NodeShape visit(const luci::CircleSegmentSum *node) final
   {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    auto segment_shape = loco::shape_get(node->segment_ids()).as<loco::TensorShape>();
-
-    LUCI_ASSERT(segment_shape.rank() == 1, "segment_ids must be 1-D tensor");
-    LUCI_ASSERT(segment_shape.dim(0).value() == input_shape.dim(0).value(),
-                "segment_ids size must be equal to the size of data's first dimension");
-
-    auto ids_shape_value = loco::must_cast<luci::CircleConst *>(node->segment_ids());
-
-    std::vector<int64_t> vect_ids;
-
-    if (ids_shape_value->dtype() == loco::DataType::S32)
-      vect_ids = vector_from_constant<loco::DataType::S32>(ids_shape_value);
-
-    LUCI_ASSERT(std::is_sorted(vect_ids.begin(), vect_ids.end()),
-                "segment_ids values should be sorted")
-
-    loco::TensorShape output_shape;
-
-    output_shape.rank(input_shape.rank());
-
-    for (uint32_t i = 1; i < input_shape.rank(); ++i)
-      output_shape.dim(i) = input_shape.dim(i);
-
-    output_shape.dim(0) = vect_ids.back() + 1;
-
-    return loco::NodeShape{output_shape};
+    return infer_segment_sum(node);
   }
 
-  loco::NodeShape visit(const luci::CircleSelect *node) final
-  {
-    auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
-    assert(t_shape == loco::shape_get(node->e()).as<loco::TensorShape>());
+  loco::NodeShape visit(const luci::CircleSelect *node) final { return infer_select(node); }
 
-    // condition shape validation
-    auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
-    if (c_shape.rank() != t_shape.rank())
-    {
-      if (c_shape.rank() != 0 && c_shape.rank() != 1)
-        INTERNAL_EXN_V("CircleSelect condition rank is not 0 nor 1: ", c_shape.rank());
+  loco::NodeShape visit(const luci::CircleSelectV2 *node) final { return infer_select_v2(node); }
 
-      if (c_shape.rank() == 1)
-      {
-        if (c_shape.dim(0).value() != t_shape.dim(0).value())
-          INTERNAL_EXN("CircleSelect condition dim(0) should match with t.dim(0)");
-      }
-    }
-
-    return loco::NodeShape{t_shape};
-  }
-
-  loco::NodeShape visit(const luci::CircleSelectV2 *node) final
-  {
-    auto c_shape = loco::shape_get(node->condition()).as<loco::TensorShape>();
-    auto t_shape = loco::shape_get(node->t()).as<loco::TensorShape>();
-    auto e_shape = loco::shape_get(node->e()).as<loco::TensorShape>();
-
-    // validate ability to broadcast shapes to each other
-    auto b_shape = broadcast_shape(broadcast_shape(c_shape, t_shape), e_shape);
-    return loco::NodeShape{b_shape};
-  }
-
-  loco::NodeShape visit(const luci::CircleShape *node) final
-  {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
-    loco::TensorShape output_shape;
-
-    output_shape.rank(1);
-    output_shape.dim(0) = input_shape.rank();
-
-    return loco::NodeShape{output_shape};
-  }
+  loco::NodeShape visit(const luci::CircleShape *node) final { return infer_shape(node); }
 
   loco::NodeShape visit(const luci::CircleSin *node) final { return use_x(node); }
 
-  loco::NodeShape visit(const luci::CircleSlice *node) final
-  {
-    const loco::DataType S32 = loco::DataType::S32;
-    const loco::DataType S64 = loco::DataType::S64;
-
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
-    auto const_begin = loco::must_cast<luci::CircleConst *>(node->begin());
-    auto const_size = loco::must_cast<luci::CircleConst *>(node->size());
-
-    loco::TensorShape output_shape;
-    std::vector<int64_t> vect_begin; // to hold both S32/S64, we use int64_t
-    std::vector<int64_t> vect_size;
-
-    if (const_begin->dtype() == S32)
-      vect_begin = vector_from_constant<S32>(const_begin);
-    else if (const_begin->dtype() == S64)
-      vect_begin = vector_from_constant<S64>(const_begin);
-    else
-      LUCI_ASSERT(false, "Only support int32/int64 for begin()");
-
-    if (const_size->dtype() == S32)
-      vect_size = vector_from_constant<S32>(const_size);
-    else if (const_size->dtype() == S64)
-      vect_size = vector_from_constant<S64>(const_size);
-    else
-      LUCI_ASSERT(false, "Only support int32/int64 for size()");
-
-    assert(input_shape.rank() == vect_begin.size());
-    assert(input_shape.rank() == vect_size.size());
-
-    output_shape.rank(vect_begin.size());
-    for (uint32_t idx = 0; idx < vect_begin.size(); ++idx)
-    {
-      auto size = vect_size.at(idx);
-      if (size == -1)
-      {
-        size = input_shape.dim(idx).value() - vect_begin.at(idx);
-      }
-      output_shape.dim(idx) = size;
-    }
-
-    return loco::NodeShape{output_shape};
-  }
+  loco::NodeShape visit(const luci::CircleSlice *node) final { return infer_slice(node); }
 
   loco::NodeShape visit(const luci::CircleSoftmax *node) final { return use_logits(node); }
 
   loco::NodeShape visit(const luci::CircleSpaceToBatchND *node) final
   {
-    const loco::DataType S32 = loco::DataType::S32;
-
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    // Support only input rank is 3 and 4
-    assert(input_shape.rank() == 3 || input_shape.rank() == 4);
-
-    // Only support block_shape() with S32 type CircleConst for now
-    auto const_block_shape = loco::must_cast<luci::CircleConst *>(node->block_shape());
-    LUCI_ASSERT(const_block_shape->dtype() == S32, "Only support int32 block_shape");
-
-    // Only support paddings() with S32 type CircleConst for now
-    auto const_paddings = loco::must_cast<luci::CircleConst *>(node->paddings());
-    LUCI_ASSERT(const_paddings->dtype() == S32, "Only support int32 paddings");
-
-    auto const_block_shape_shape = loco::shape_get(const_block_shape).as<loco::TensorShape>();
-    auto const_paddings_shape = loco::shape_get(const_paddings).as<loco::TensorShape>();
-    assert(const_block_shape_shape.rank() == 1);
-    assert(const_paddings_shape.rank() == 2);
-
-    int32_t input_spatial_dim = input_shape.rank() - 2;
-    assert(const_block_shape_shape.dim(0) == input_spatial_dim);
-    assert(const_paddings_shape.dim(0) == input_spatial_dim);
-    assert(const_paddings_shape.dim(1) == 2);
-
-    // Check all values of block_shape >= 1
-    uint32_t ele_count = const_block_shape->size<S32>();
-    for (uint32_t e = 0; e < ele_count; ++e)
-    {
-      auto val = const_block_shape->at<S32>(e);
-      if (val < 1)
-      {
-        INTERNAL_EXN_V("All values of block_shape >= 1: ", e);
-      }
-    }
-
-    loco::TensorShape shape_output;
-
-    shape_output.rank(input_shape.rank());
-
-    int32_t output_batch_size = input_shape.dim(0).value();
-    for (int32_t dim = 0; dim < input_spatial_dim; ++dim)
-    {
-      int dim_size = input_shape.dim(dim + 1).value();
-      dim_size += const_paddings->at<S32>(dim * 2);
-      dim_size += const_paddings->at<S32>(dim * 2 + 1);
-      shape_output.dim(dim + 1) = dim_size / const_block_shape->at<S32>(dim);
-
-      assert(dim_size % const_block_shape->at<S32>(dim) == 0);
-      output_batch_size = output_batch_size * const_block_shape->at<S32>(dim);
-    }
-    shape_output.dim(0) = output_batch_size;
-    shape_output.dim(input_shape.rank() - 1) = input_shape.dim(input_shape.rank() - 1);
-
-    return loco::NodeShape{shape_output};
+    return infer_space_to_batch_nd(node);
   }
 
   loco::NodeShape visit(const luci::CircleSpaceToDepth *node) final
   {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    LUCI_ASSERT(input_shape.rank() == 4, "Only input rank 4 is supported");
-
-    // Only data format NHWC is supported
-    int32_t height = input_shape.dim(1).value();
-    int32_t width = input_shape.dim(2).value();
-    int32_t depth = input_shape.dim(3).value();
-
-    int block_size = node->block_size();
-
-    if (block_size < 2)
-      INTERNAL_EXN("Block size must be >= 2");
-
-    if ((height % block_size) || (width % block_size))
-    {
-      INTERNAL_EXN("The input tensor's height and width must be divisible by block_size");
-    }
-
-    loco::TensorShape output_shape;
-    output_shape.rank(4);
-
-    output_shape.dim(0) = input_shape.dim(0).value();
-    output_shape.dim(1) = height / block_size;
-    output_shape.dim(2) = width / block_size;
-    output_shape.dim(3) = block_size * block_size * depth;
-
-    return loco::NodeShape{output_shape};
+    return infer_space_to_depth(node);
   }
 
   loco::NodeShape visit(const luci::CircleSparseToDense *node) final
   {
-    loco::TensorShape shape;
-    {
-      LUCI_ASSERT(node->output_shape(), "dims input should not be nullptr");
-
-      auto output_shape_node = dynamic_cast<luci::CircleConst *>(node->output_shape());
-      if (output_shape_node != nullptr)
-      {
-        // Only support node with S32
-        LUCI_ASSERT(output_shape_node->dtype() == loco::DataType::S32,
-                    "Only support int32 CircleConst");
-
-        if (output_shape_node->rank() != 1)
-          INTERNAL_EXN_V("Only support rank 1 CircleConst",
-                         oops::to_uint32(output_shape_node->rank()));
-
-        shape.rank(output_shape_node->dim(0).value());
-
-        for (uint32_t axis = 0; axis < shape.rank(); ++axis)
-        {
-          shape.dim(axis) = output_shape_node->at<loco::DataType::S32>(axis);
-        }
-      }
-      else
-      {
-        shape = own_shape(node);
-      }
-    }
-
-    return loco::NodeShape{shape};
+    return infer_sparse_to_dense(node);
   }
 
   loco::NodeShape visit(const luci::CircleSplit *node) final
@@ -1692,71 +2340,10 @@ public:
 
   loco::NodeShape visit(const luci::CircleStridedSlice *node) final
   {
-    auto begin_node = dynamic_cast<luci::CircleConst *>(node->begin());
-    auto end_node = dynamic_cast<luci::CircleConst *>(node->end());
-    auto strides_node = dynamic_cast<luci::CircleConst *>(node->strides());
-
-    if (begin_node == nullptr || end_node == nullptr || strides_node == nullptr)
-    {
-      return use_own(node);
-    }
-
-    loco::TensorShape shape = infer_output_shape(node);
-    return loco::NodeShape{shape};
+    return infer_strided_slice(node);
   }
 
-  loco::NodeShape visit(const luci::CircleSqueeze *node) final
-  {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
-    // TODO input shape may be unknown before runtime
-    std::vector<bool> do_squeeze(input_shape.rank(), false);
-    uint32_t num_squeezed = 0;
-
-    if (!node->squeeze_dims().empty())
-    {
-      // SqueezeDims not empty, squeeze only dims specified
-      for (int32_t raw_dim : node->squeeze_dims())
-      {
-        int32_t dim = raw_dim < 0 ? raw_dim + input_shape.rank() : raw_dim;
-
-        if (dim < 0 || static_cast<uint32_t>(dim) >= input_shape.rank() ||
-            input_shape.dim(dim).value() != 1)
-        {
-          INTERNAL_EXN("invalid dimention specified to Squeeze");
-        }
-
-        if (!do_squeeze[dim])
-          ++num_squeezed;
-        do_squeeze[dim] = true;
-      }
-    }
-    else
-    {
-      // SqueezeDims empty, squeeze any dims with size == 1
-      for (uint32_t dim = 0; dim < input_shape.rank(); ++dim)
-      {
-        if (input_shape.dim(dim) == 1)
-        {
-          do_squeeze[dim] = true;
-          ++num_squeezed;
-        }
-      }
-    }
-
-    loco::TensorShape output_shape;
-    output_shape.rank(input_shape.rank() - num_squeezed);
-
-    for (uint32_t in_dim = 0, out_dim = 0; in_dim < input_shape.rank(); ++in_dim)
-    {
-      if (!do_squeeze[in_dim])
-      {
-        output_shape.dim(out_dim++) = input_shape.dim(in_dim);
-      }
-    }
-
-    return loco::NodeShape{output_shape};
-  }
+  loco::NodeShape visit(const luci::CircleSqueeze *node) final { return infer_squeeze(node); }
 
   loco::NodeShape visit(const luci::CircleSub *node) final { return broadcast_xy(node); }
 
@@ -1768,33 +2355,7 @@ public:
 
   loco::NodeShape visit(const luci::CircleTanh *node) final { return use_x(node); }
 
-  loco::NodeShape visit(const luci::CircleTile *node) final
-  {
-    const loco::DataType S32 = loco::DataType::S32;
-
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    auto multiples = loco::must_cast<luci::CircleConst *>(node->multiples());
-
-    // TODO support non-const case
-    // TODO support S64 type
-    LUCI_ASSERT(multiples->dtype() == S32, "Only support int32 multiples");
-    LUCI_ASSERT(multiples->rank() == 1, "multiples should be rank 1")
-
-    uint32_t n = multiples->dim(0).value();
-
-    LUCI_ASSERT(n == input_shape.rank(), "length of multiples should be the same with input rank");
-
-    loco::TensorShape output_shape;
-
-    output_shape.rank(input_shape.rank());
-    for (uint32_t ni = 0; ni < n; ++ni)
-    {
-      int32_t multiple = multiples->at<S32>(ni);
-      output_shape.dim(ni) = input_shape.dim(ni).value() * static_cast<uint32_t>(multiple);
-    }
-
-    return loco::NodeShape{output_shape};
-  }
+  loco::NodeShape visit(const luci::CircleTile *node) final { return infer_tile(node); }
 
   loco::NodeShape visit(const luci::CircleTopKV2 *node) final
   {
@@ -1803,93 +2364,16 @@ public:
     return loco::NodeShape{input_shape};
   }
 
-  loco::NodeShape visit(const luci::CircleTranspose *node) final
-  {
-    auto input_shape = loco::shape_get(node->a()).as<loco::TensorShape>();
-
-    auto perm_node = loco::must_cast<luci::CircleConst *>(node->perm());
-
-    loco::TensorShape output_shape;
-    output_shape.rank(input_shape.rank());
-
-    assert(perm_node->dtype() == loco::DataType::S32);
-    assert(input_shape.rank() == perm_node->template size<loco::DataType::S32>());
-
-    for (uint32_t out_axis = 0; out_axis < output_shape.rank(); out_axis++)
-    {
-      auto in_axis = perm_node->template at<loco::DataType::S32>(out_axis);
-      output_shape.dim(out_axis) = input_shape.dim(in_axis);
-    }
-
-    return output_shape;
-  }
-
-  loco::NodeShape visit(const luci::CircleUnique *node) final
-  {
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-
-    assert(input_shape.rank() == 1);
-
-    loco::TensorShape shape_output;
-    shape_output = own_shape(node);
-
-    return loco::NodeShape{shape_output};
-  }
+  loco::NodeShape visit(const luci::CircleTranspose *node) final { return infer_transpose(node); }
 
   loco::NodeShape visit(const luci::CircleTransposeConv *node) final
   {
-    // TransposeConv's output shape is written in its 'inputSizes' argument
-    auto input_sizes_const = loco::must_cast<luci::CircleConst *>(node->inputSizes());
-    // TODO support non-const type
-    LUCI_ASSERT(input_sizes_const->dtype() == loco::DataType::S32, "Only support S32 dtype")
-    LUCI_ASSERT(input_sizes_const->rank() == 1 && input_sizes_const->dim(0).value() == 4,
-                "Only support rank 1 with 4 entries")
-
-    loco::TensorShape shape;
-
-    shape.rank(4);
-    for (uint32_t axis = 0; axis < 4; ++axis)
-      shape.dim(axis) = input_sizes_const->at<loco::DataType::S32>(axis);
-
-    return loco::NodeShape{shape};
+    return infer_transpose_conv(node);
   }
 
-  loco::NodeShape visit(const luci::CircleUnpack *node) final
-  {
-    // CircleUnpack provides list(array) of Tensors which has one less dimension of the input
-    // We'll set shape of CircleUnpack to shape of actual outputs
-    // TODO fix this if any problem rises
-    auto value_shape = loco::shape_get(node->value()).as<loco::TensorShape>();
-
-    auto axis = node->axis();
-    auto num = node->num();
-    auto rank = static_cast<int32_t>(value_shape.rank());
-
-    if (rank == 0)
-    {
-      // Unknown shape
-      return use_own(node);
-    }
-
-    LUCI_ASSERT(-rank <= axis && axis < rank, "Axis is out of range");
-
-    if (axis < 0)
-      axis += rank;
-
-    LUCI_ASSERT(num == static_cast<int32_t>(value_shape.dim(axis).value()),
-                "num, axis maybe incorrect");
-
-    loco::TensorShape output_shape;
-    output_shape.rank(rank - 1);
+  loco::NodeShape visit(const luci::CircleUnpack *node) final { return infer_unpack(node); }
 
-    for (int32_t i = 0, o = 0; i < rank; ++i)
-    {
-      if (i != axis)
-        output_shape.dim(o++) = value_shape.dim(i);
-    }
-
-    return loco::NodeShape{output_shape};
-  }
+  loco::NodeShape visit(const luci::CircleUnique *node) final { return infer_unique(node); }
 
   loco::NodeShape visit(const luci::CircleWhere *node) final { return use_own(node); }
 
@@ -1911,57 +2395,10 @@ public:
   // Circle Only
   loco::NodeShape visit(const luci::CircleBCQFullyConnected *node) final
   {
-    loco::TensorShape out_shape;
-
-    auto input_shape = loco::shape_get(node->input()).as<loco::TensorShape>();
-    auto weights_clusters = loco::must_cast<luci::CircleConst *>(node->weights_clusters());
-
-    LUCI_ASSERT(input_shape.rank() == 2, "Input rank of BCQFullyConnected should be 2");
-
-    int32_t qbits_sum = 0;
-    for (uint32_t i = 0; i < weights_clusters->dim(0).value(); ++i)
-    {
-      qbits_sum += weights_clusters->at<loco::DataType::S32>(i * 2 + 1);
-    }
-
-    out_shape.rank(2);
-    out_shape.dim(0) = qbits_sum;
-    out_shape.dim(1) = input_shape.dim(1);
-
-    return loco::NodeShape{out_shape};
+    return infer_bcq_fully_connected(node);
   }
 
-  loco::NodeShape visit(const luci::CircleBCQGather *node) final
-  {
-    loco::TensorShape input_shape;
-    loco::TensorShape output_shape;
-
-    const auto input_binary_shape = loco::shape_get(node->input_binary()).as<loco::TensorShape>();
-    const auto indices_shape = loco::shape_get(node->indices()).as<loco::TensorShape>();
-    auto axis = node->axis();
-
-    auto input_clusters = loco::must_cast<luci::CircleConst *>(node->input_clusters());
-    auto qbits_sum = 0;
-    for (uint32_t i = 0; i < input_clusters->dim(0).value(); ++i)
-    {
-      qbits_sum += input_clusters->at<loco::DataType::S32>(i * 2 + 1);
-    }
-
-    input_shape.rank(2);
-    input_shape.dim(0) = qbits_sum;
-    input_shape.dim(1) = input_binary_shape.dim(1).value() * 32;
-
-    output_shape.rank(input_shape.rank() - 1 + indices_shape.rank());
-    int32_t outdim_index = 0;
-    for (int32_t i = 0; i < axis; ++i)
-      output_shape.dim(outdim_index++) = input_shape.dim(i);
-    for (uint32_t i = 0; i < indices_shape.rank(); ++i)
-      output_shape.dim(outdim_index++) = indices_shape.dim(i);
-    for (uint32_t i = axis + 1; i < input_shape.rank(); ++i)
-      output_shape.dim(outdim_index++) = input_shape.dim(i);
-
-    return loco::NodeShape{output_shape};
-  }
+  loco::NodeShape visit(const luci::CircleBCQGather *node) final { return infer_bcq_gather(node); }
 
   loco::NodeShape visit(const luci::CircleInstanceNorm *node) final
   {
@@ -1971,25 +2408,9 @@ public:
   }
 
   // Virtual
-  loco::NodeShape visit(const luci::CircleInput *node) final
-  {
-    loco::TensorShape shape;
+  loco::NodeShape visit(const luci::CircleInput *node) final { return infer_input(node); }
 
-    shape.rank(node->rank());
-    for (uint32_t axis = 0; axis < node->rank(); axis++)
-      shape.dim(axis) = node->dim(axis);
-
-    return loco::NodeShape{shape};
-  }
-
-  loco::NodeShape visit(const luci::CircleOutput *node) final
-  {
-    auto graph_outputs = node->graph()->outputs();
-    auto graph_output = graph_outputs->at(node->index());
-    auto output_shape = graph_output->shape();
-
-    return loco::NodeShape{*output_shape};
-  }
+  loco::NodeShape visit(const luci::CircleOutput *node) final { return infer_output(node); }
 
   loco::NodeShape visit(const luci::CircleOutputDummy *node) final { return use_own(node); }
 
@@ -1997,259 +2418,32 @@ public:
 
   loco::NodeShape visit(const luci::CircleCustomOut *node) final { return use_own(node); }
 
-  loco::NodeShape visit(const luci::CircleIfOut *node) final
-  {
-    /**
-     * @note  IF operator type and shape are that of the "then" and "else"
-     *        Graph Outputs.
-     */
-    auto circle_if = dynamic_cast<const luci::CircleIf *>(node->input());
-    if (circle_if == nullptr)
-    {
-      INTERNAL_EXN("CircleIf IR is not configured correctly");
-    }
-
-    auto index = node->index();
-    auto then_graph = circle_if->then_graph();
-    auto else_graph = circle_if->else_graph();
-    assert(then_graph != nullptr);
-    assert(else_graph != nullptr);
-
-    // shape and type are assumed to be same
-    // these are checked at post_import_graph() in Import
-    auto then_outputs = loco::output_nodes(then_graph);
-    auto else_outputs = loco::output_nodes(else_graph);
-    assert(then_outputs.size() == else_outputs.size());
-    assert(index < static_cast<int32_t>(then_outputs.size()));
-
-    auto then_out = loco::must_cast<luci::CircleOutput *>(then_outputs.at(index));
-    auto else_out = loco::must_cast<luci::CircleOutput *>(else_outputs.at(index));
-
-    auto then_graph_outputs = then_graph->outputs(); // loco::GraphOutput items
-    auto else_graph_outputs = else_graph->outputs();
-    assert(then_graph_outputs->size() == else_graph_outputs->size());
-
-    auto then_graph_output = then_graph_outputs->at(then_out->index());
-    auto else_graph_output = else_graph_outputs->at(else_out->index());
-    (void)else_graph_output; // make compiler happy for unused variable warnings
-    assert(*then_graph_output->shape() == *else_graph_output->shape());
-
-    return loco::NodeShape{*then_graph_output->shape()};
-  }
+  loco::NodeShape visit(const luci::CircleIfOut *node) final { return infer_if_out(node); }
 
   loco::NodeShape visit(const luci::CircleNonMaxSuppressionV4Out *node) final
   {
-    const loco::DataType S32 = loco::DataType::S32;
-
-    auto nmsv4 = dynamic_cast<const luci::CircleNonMaxSuppressionV4 *>(node->input());
-    if (nmsv4 == nullptr)
-      INTERNAL_EXN("CircleNonMaxSuppressionV4 IR is not configured correctly");
-
-    auto index = node->index();
-    if (index == 1)
-      return loco::TensorShape({0});
-
-    assert(index == 0);
-
-    auto unknown = loco::TensorShape{loco::Dimension()};
-    auto max_output_size = dynamic_cast<const luci::CircleConst *>(nmsv4->max_output_size());
-    if (max_output_size == nullptr)
-      return unknown; // we need CircleConst for max output size
-
-    LUCI_ASSERT(max_output_size->dtype() == S32, "Only support int32 for max_output_size");
-
-    if (max_output_size->size<S32>() < 1)
-      return unknown;
-
-    auto max_output_size_value = uint32_t(max_output_size->at<S32>(0));
-    return loco::TensorShape{max_output_size_value};
+    return infer_non_max_suppression_v4_out(node);
   }
 
-  loco::NodeShape visit(const luci::CircleSplitOut *node) final
+  loco::NodeShape visit(const luci::CircleNonMaxSuppressionV5Out *node) final
   {
-    const loco::DataType S32 = loco::DataType::S32;
-
-    auto split = dynamic_cast<const luci::CircleSplit *>(node->input());
-    if (split == nullptr)
-      INTERNAL_EXN("CircleSplit IR is not configured correctly");
-
-    loco::NodeShape unknown;
-
-    auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
-
-    auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
-    if (split_dim == nullptr)
-      return unknown; // we need CircleConst for split_dim
-    LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim");
-
-    assert(split_dim->size<S32>() == 1);
-    auto split_dim_axis = split_dim->at<S32>(0);
-    if (split_dim_axis < 0)
-      split_dim_axis += split_shape.rank();
-
-    auto split_dim_value = split_shape.dim(split_dim_axis).value();
-    assert(split_dim_value % split->num_split() == 0);
-    const int split_depth = split_dim_value / split->num_split();
-
-    loco::TensorShape output_shape = split_shape;
-
-    // All shapes are equally same
-    output_shape.dim(split_dim_axis) = loco::Dimension(split_depth);
-
-    return loco::NodeShape{output_shape};
+    return infer_non_max_suppression_v5_out(node);
   }
 
-  loco::NodeShape visit(const luci::CircleSplitVOut *node) final
-  {
-    const loco::DataType S32 = loco::DataType::S32;
-
-    auto split = dynamic_cast<const luci::CircleSplitV *>(node->input());
-    if (split == nullptr)
-      INTERNAL_EXN("CircleSplit IR is not configured correctly");
-
-    loco::NodeShape unknown;
-
-    auto split_shape = loco::shape_get(split).as<loco::TensorShape>();
-
-    auto size_splits = dynamic_cast<const luci::CircleConst *>(split->size_splits());
-    if (size_splits == nullptr)
-      return unknown; // we need CircleConst for size_splits
-    LUCI_ASSERT(size_splits->dtype() == S32, "Only support int32 for size_splits");
-
-    auto split_dim = dynamic_cast<const luci::CircleConst *>(split->split_dim());
-    if (split_dim == nullptr)
-      return unknown; // we need CircleConst for split_dim
-    LUCI_ASSERT(split_dim->dtype() == S32, "Only support int32 for split_dim");
-
-    // fetch axis
-    assert(split_dim->size<S32>() == 1);
-    auto split_dim_axis = split_dim->at<S32>(0);
-    if (split_dim_axis < 0)
-      split_dim_axis += split_shape.rank();
-
-    // interpret size_splits values
-    int32_t size_splits_count = static_cast<int32_t>(size_splits->size<S32>());
-    assert(size_splits_count == split->num_split());
-
-    int64_t minus_one_count = 0, size_splits_sum = 0;
-    for (int32_t idx = 0; idx < size_splits_count; ++idx)
-    {
-      auto size = size_splits->at<S32>(idx);
-      assert(size >= -1);
-      if (size == -1)
-        ++minus_one_count;
-      else
-        size_splits_sum += size;
-    }
-    if (minus_one_count > 1)
-      INTERNAL_EXN("CircleSplitV size_splits has more than two -1 values");
-
-    // calcuate this SplitVOut shape
-    auto input_size = split_shape.dim(split_dim_axis).value();
-    assert(size_splits_sum <= input_size);
-
-    auto index_this = node->index();
-    assert(0 <= index_this && index_this < split->num_split());
-    auto split_depth = size_splits->at<S32>(index_this);
-    if (split_depth == -1)
-      split_depth = input_size - size_splits_sum;
+  loco::NodeShape visit(const luci::CircleSplitOut *node) final { return infer_split_out(node); }
 
-    loco::TensorShape output_shape = split_shape;
-
-    output_shape.dim(split_dim_axis) = loco::Dimension(split_depth);
-
-    return loco::NodeShape{output_shape};
-  }
+  loco::NodeShape visit(const luci::CircleSplitVOut *node) final { return infer_split_v_out(node); }
 
   loco::NodeShape visit(const luci::CircleTopKV2Out *node) final
   {
-    const loco::DataType S32 = loco::DataType::S32;
-
-    auto topkv2 = dynamic_cast<const luci::CircleTopKV2 *>(node->input());
-    if (topkv2 == nullptr)
-      INTERNAL_EXN("CircleSplit IR is not configured correctly");
-
-    // shape of topkv2 is same as topkv2->input()
-    auto input_shape = loco::shape_get(topkv2).as<loco::TensorShape>();
-
-    auto node_k = loco::must_cast<const luci::CircleConst *>(topkv2->k());
-    LUCI_ASSERT(node_k->dtype() == S32, "Only support Int32");
-    assert(node_k->size<S32>() == 1);
-
-    loco::TensorShape output_shape;
-
-    output_shape.rank(input_shape.rank());
-    for (uint32_t idx = 0; idx < input_shape.rank() - 1; ++idx)
-    {
-      output_shape.dim(idx) = input_shape.dim(idx);
-    }
-    output_shape.dim(input_shape.rank() - 1) = node_k->at<S32>(0);
-
-    return loco::NodeShape{output_shape};
+    return infer_top_k_v2_out(node);
   }
 
-  loco::NodeShape visit(const luci::CircleUniqueOut *node) final
-  {
-    auto unique = dynamic_cast<const luci::CircleUnique *>(node->input());
-    if (unique == nullptr)
-    {
-      INTERNAL_EXN("CircleUnique IR is not configured correctly");
-    }
+  loco::NodeShape visit(const luci::CircleUniqueOut *node) final { return infer_unique_out(node); }
 
-    auto unique_shape = loco::shape_get(unique).as<loco::TensorShape>();
+  loco::NodeShape visit(const luci::CircleUnpackOut *node) final { return infer_unpack_out(node); }
 
-    return loco::NodeShape{unique_shape};
-  }
-
-  loco::NodeShape visit(const luci::CircleUnpackOut *node) final
-  {
-    auto unpack = dynamic_cast<const luci::CircleUnpack *>(node->input());
-    if (unpack == nullptr)
-    {
-      INTERNAL_EXN("CircleUnpack IR is not configured correctly");
-    }
-
-    auto unpack_shape = loco::shape_get(unpack).as<loco::TensorShape>();
-
-    return loco::NodeShape{unpack_shape};
-  }
-
-  loco::NodeShape visit(const luci::CircleWhileOut *node) final
-  {
-    /**
-     * @note  WHILE operator's shape is the same with the "cond"
-     *        Graph input.
-     */
-    auto circle_while = dynamic_cast<const luci::CircleWhile *>(node->input());
-    if (circle_while == nullptr)
-    {
-      INTERNAL_EXN("CircleWhile IR is not configured correctly");
-    }
-
-    auto index = node->index();
-    auto cond_graph = circle_while->cond_graph();
-    assert(cond_graph != nullptr);
-
-    // Assumption: the index of CircleWhileOut matches with the index of input nodes returned by
-    // loco::input_nodes
-    auto cond_inputs = loco::input_nodes(cond_graph);
-    auto cond_in = loco::must_cast<luci::CircleInput *>(cond_inputs.at(index));
-
-    auto cond_graph_inputs = cond_graph->inputs();
-    auto cond_graph_input = cond_graph_inputs->at(cond_in->index());
-
-    auto cond_graph_input_shape = *cond_graph_input->shape();
-    auto this_shape = own_shape(node);
-
-    if (!(this_shape == cond_graph_input_shape))
-    {
-      LOGGER(l);
-      WARN(l) << "Warning: CircleWhileOut '" << node->name() << "' shape mispatch " << this_shape
-              << " vs " << cond_graph_input_shape;
-    }
-
-    return loco::NodeShape{this_shape};
-  }
+  loco::NodeShape visit(const luci::CircleWhileOut *node) final { return infer_while_out(node); }
 };
 
 } // namespace
diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
index e7910bfc0..d28d8ac99 100644
--- a/compiler/luci/service/src/CircleTypeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
@@ -257,6 +257,11 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
     return loco::dtype_get(node->boxes());
   }
 
+  loco::DataType visit(const luci::CircleNonMaxSuppressionV5 *node) final
+  {
+    return loco::dtype_get(node->boxes());
+  }
+
   loco::DataType visit(const luci::CircleNotEqual *) final { return loco::DataType::BOOL; }
 
   loco::DataType visit(const luci::CirclePack *node) final
@@ -273,6 +278,11 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
 
   loco::DataType visit(const luci::CirclePad *node) final { return loco::dtype_get(node->input()); }
 
+  loco::DataType visit(const luci::CirclePadV2 *node) final
+  {
+    return loco::dtype_get(node->input());
+  }
+
   loco::DataType visit(const luci::CirclePow *node) final
   {
     // TODO make sure types cannot differ
@@ -589,6 +599,17 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
     return loco::DataType::S32;
   }
 
+  loco::DataType visit(const luci::CircleNonMaxSuppressionV5Out *node) final
+  {
+    (void)node;
+    if (node->index() == 0 || node->index() == 2)
+    {
+      return loco::DataType::S32;
+    }
+    assert(node->index() == 1);
+    return loco::DataType::FLOAT32;
+  }
+
   loco::DataType visit(const luci::CircleSplitOut *node) final
   {
     return loco::dtype_get(node->input());
diff --git a/compiler/luci/tests/test.lst b/compiler/luci/tests/test.lst
index 9fd42ed4e..12dd7ff5b 100644
--- a/compiler/luci/tests/test.lst
+++ b/compiler/luci/tests/test.lst
@@ -96,6 +96,10 @@ addread(MirrorPad_000)
 addread(Mul_000)
 addread(Mul_U8_000)
 addread(Neg_000)
+addread(NonMaxSuppressionV4_000)
+addread(NonMaxSuppressionV4_001)
+addread(NonMaxSuppressionV5_000)
+addread(NonMaxSuppressionV5_001)
 addread(NotEqual_000)
 addread(OneHot_000)
 addread(OneHot_001)
@@ -105,6 +109,7 @@ addread(Pack_000)
 addread(Pack_U8_000)
 addread(Pad_000)
 addread(Pad_U8_000)
+addread(PadV2_000)
 addread(Pow_000)
 addread(PRelu_000)
 addread(Range_000)
@@ -128,6 +133,7 @@ addread(Reshape_002)
 addread(Reshape_003)
 addread(Reshape_U8_000)
 addread(ResizeBilinear_000)
+addread(ResizeBilinear_U8_000)
 addread(ResizeNearestNeighbor_000)
 addread(ReverseSequence_000)
 addread(ReverseV2_000)
@@ -151,6 +157,7 @@ addread(SpaceToBatchND_001)
 addread(SpaceToBatchND_002)
 addread(SpaceToBatchND_003)
 addread(SpaceToDepth_000)
+addread(SpaceToDepth_U8_000)
 addread(SparseToDense_000)
 addread(Split_000)
 addread(SplitV_000)
@@ -166,12 +173,19 @@ addread(Sub_U8_000)
 addread(Sum_000)
 addread(Sum_001)
 addread(Tanh_000)
+addread(Tanh_U8_000)
 addread(Tile_000)
 addread(Tile_U8_000)
 addread(TopKV2_000)
 addread(TopKV2_001)
 addread(Transpose_000)
 addread(TransposeConv_000)
+addread(Unique_000)
+addread(Unique_001)
+addread(Unique_002)
+addread(Unique_003)
+addread(Unique_U8_000)
+addread(Unique_U8_001)
 addread(Unpack_000)
 addread(Unpack_001)
 addread(Unpack_002)
@@ -296,6 +310,10 @@ addwrite(MirrorPad_000)
 addwrite(Mul_000)
 addwrite(Mul_U8_000)
 addwrite(Neg_000)
+addwrite(NonMaxSuppressionV4_000)
+addwrite(NonMaxSuppressionV4_001)
+addwrite(NonMaxSuppressionV5_000)
+addwrite(NonMaxSuppressionV5_001)
 addwrite(NotEqual_000)
 addwrite(OneHot_000)
 addwrite(OneHot_001)
@@ -304,6 +322,7 @@ addwrite(OneHot_003)
 addwrite(Pack_000)
 addwrite(Pack_U8_000)
 addwrite(Pad_000)
+addwrite(PadV2_000)
 addwrite(Pow_000)
 addwrite(PRelu_000)
 addwrite(Range_000)
@@ -327,6 +346,7 @@ addwrite(Reshape_002)
 addwrite(Reshape_003)
 addwrite(Reshape_U8_000)
 addwrite(ResizeBilinear_000)
+addwrite(ResizeBilinear_U8_000)
 addwrite(ResizeNearestNeighbor_000)
 addwrite(ReverseSequence_000)
 addwrite(ReverseV2_000)
@@ -350,6 +370,7 @@ addwrite(SpaceToBatchND_001)
 addwrite(SpaceToBatchND_002)
 addwrite(SpaceToBatchND_003)
 addwrite(SpaceToDepth_000)
+addwrite(SpaceToDepth_U8_000)
 addwrite(SparseToDense_000)
 addwrite(Split_000)
 addwrite(SplitV_000)
@@ -365,12 +386,19 @@ addwrite(Sub_U8_000)
 addwrite(Sum_000)
 addwrite(Sum_001)
 addwrite(Tanh_000)
+addwrite(Tanh_U8_000)
 addwrite(Tile_000)
 addwrite(Tile_U8_000)
 addwrite(TopKV2_000)
 addwrite(TopKV2_001)
 addwrite(Transpose_000)
 addwrite(TransposeConv_000)
+addwrite(Unique_000)
+addwrite(Unique_001)
+addwrite(Unique_002)
+addwrite(Unique_003)
+addwrite(Unique_U8_000)
+addwrite(Unique_U8_001)
 addwrite(Unpack_000)
 addwrite(Unpack_001)
 addwrite(Unpack_002)
diff --git a/compiler/one-cmds/one-import-tf b/compiler/one-cmds/one-import-tf
index d59e1c529..58c686882 100644
--- a/compiler/one-cmds/one-import-tf
+++ b/compiler/one-cmds/one-import-tf
@@ -83,6 +83,10 @@ while [ "$#" -ne 0 ]; do
   esac
 done
 
+if [ -n ${INPUT_SHAPES} ] && [ ${TF_INTERFACE} = "--v2" ]; then
+  echo "Warning: if --v2 option is used, shape will be ignored"
+fi
+
 if [ -z ${INPUT_PATH} ] || [ ! -e ${INPUT_PATH} ]; then
   echo "Error: input model not found"
   echo ""
@@ -117,16 +121,18 @@ show_err_onexit()
 trap show_err_onexit ERR
 
 # generate temporary tflite file
-echo "python" "${DRIVER_PATH}/tf2tfliteV2.py" ${TF_INTERFACE} --input_path ${INPUT_PATH} \
---input_arrays ${INPUT_ARRAYS} --input_shapes ${INPUT_SHAPES} \
---output_path "${TMPDIR}/${MODEL_NAME}.tflite" \
---output_arrays ${OUTPUT_ARRAYS} > "${OUTPUT_PATH}.log"
-echo " " >> "${OUTPUT_PATH}.log"
+CONVERT_SCRIPT="python ${DRIVER_PATH}/tf2tfliteV2.py ${TF_INTERFACE} "
+CONVERT_SCRIPT+="--input_path ${INPUT_PATH} "
+CONVERT_SCRIPT+="--input_arrays ${INPUT_ARRAYS} "
+CONVERT_SCRIPT+="--output_path ${TMPDIR}/${MODEL_NAME}.tflite "
+CONVERT_SCRIPT+="--output_arrays ${OUTPUT_ARRAYS} "
+if [ ! -z ${INPUT_SHAPES} ]; then
+  CONVERT_SCRIPT+="--input_shapes ${INPUT_SHAPES} "
+fi
 
-python "${DRIVER_PATH}/tf2tfliteV2.py" ${TF_INTERFACE} --input_path ${INPUT_PATH} \
---input_arrays ${INPUT_ARRAYS} --input_shapes ${INPUT_SHAPES} \
---output_path "${TMPDIR}/${MODEL_NAME}.tflite" \
---output_arrays ${OUTPUT_ARRAYS} >> "${OUTPUT_PATH}.log" 2>&1
+echo ${CONVERT_SCRIPT} > "${OUTPUT_PATH}.log"
+echo "" >> "${OUTPUT_PATH}.log"
+$CONVERT_SCRIPT >> "${OUTPUT_PATH}.log" 2>&1
 
 # convert .tflite to .circle
 echo " " >> "${OUTPUT_PATH}.log"
diff --git a/compiler/one-cmds/one-prepare-venv b/compiler/one-cmds/one-prepare-venv
index 0a53bd3dd..0b11e7f0b 100644
--- a/compiler/one-cmds/one-prepare-venv
+++ b/compiler/one-cmds/one-prepare-venv
@@ -46,7 +46,9 @@ python3 -m venv "${DRIVER_PATH}/venv"
 # Install tensorflow
 source "${VENV_ACTIVATE}"
 
+# TODO remove version number of 'pip==20.2.1 setuptools==49.3.0'
+# NOTE adding version is for temporary hotfix of setuptools 50.x.y version
 python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
-  install -U pip setuptools
+  install -U pip==20.2.1 setuptools==49.3.0
 python -m pip --default-timeout=1000 --trusted-host pypi.org --trusted-host files.pythonhost.org \
   install tensorflow-cpu==2.3.0
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/fake_quantization/ker.json
new file mode 100644
index 000000000..6460e54cf
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+  "weights": [
+    [
+      [
+        [
+          1.0,
+          2.0
+        ],
+        [
+          -3.0,
+          -4.0
+        ]
+      ],
+      [
+        [
+          -5.0,
+          6.0
+        ],
+        [
+          -7.0,
+          8.0
+        ]
+      ]
+    ],
+    [
+      [
+        [
+          4.0,
+          -2.0
+        ],
+        [
+          3.0,
+          -1.0
+        ]
+      ],
+      [
+        [
+          -8.0,
+          -6.0
+        ],
+        [
+          7.0,
+          5.0
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/bias.json
new file mode 100644
index 000000000..a55af0be5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/bias.json
@@ -0,0 +1,10 @@
+{
+  "weights": [
+    4374,
+    8747
+  ],
+  "scale": [
+    0.0002286423499283808,
+    0.0002286423499283808
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ifm.json
new file mode 100644
index 000000000..0e481bbfd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0038869199343025684,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ker.json
new file mode 100644
index 000000000..4e12a5550
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ker.json
@@ -0,0 +1,64 @@
+{
+  "weights": [
+    [
+      [
+        [
+          136,
+          153
+        ],
+        [
+          68,
+          51
+        ]
+      ],
+      [
+        [
+          34,
+          221
+        ],
+        [
+          0,
+          255
+        ]
+      ]
+    ],
+    [
+      [
+        [
+          204,
+          102
+        ],
+        [
+          187,
+          119
+        ]
+      ],
+      [
+        [
+          0,
+          34
+        ],
+        [
+          255,
+          221
+        ]
+      ]
+    ]
+  ],
+  "scale": [
+    0.058823529411764705,
+    0.058823529411764705
+  ],
+  "zero_point": [
+    119.0,
+    136.0
+  ],
+  "min": [
+    -7.0,
+    -8.0
+  ],
+  "max": [
+    8.0,
+    7.0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ofm.json
new file mode 100644
index 000000000..7d23cbad2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.05829785391688347,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ifm.json
new file mode 100644
index 000000000..af8dc16de
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": 0.022708916887640953,
+  "max": 0.9911645770072937
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ofm.json
new file mode 100644
index 000000000..5f7bd9942
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004/channel/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": 0.0,
+  "max": 14.86595230102539
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/fake_quantization/ker.json
new file mode 100644
index 000000000..675eadcb6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/fake_quantization/ker.json
@@ -0,0 +1,34 @@
+{
+  "weights": [
+    [
+      [
+        [
+          1.0352935791015625,
+          1.976470947265625,
+          2.9568634033203125,
+          3.95294189453125
+        ],
+        [
+          -8.972549438476562,
+          9.976470947265625,
+          -11.011764526367188,
+          11.9686279296875
+        ]
+      ],
+      [
+        [
+          5.0039215087890625,
+          6.023530960083008,
+          7.035295486450195,
+          8.01568603515625
+        ],
+        [
+          13.027450561523438,
+          -14.023529052734375,
+          14.988235473632812,
+          -16.0313720703125
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/bias.json
new file mode 100644
index 000000000..3cda45238
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/bias.json
@@ -0,0 +1,14 @@
+{
+  "weights": [
+    2985,
+    5473,
+    7578,
+    9382
+  ],
+  "scale": [
+    0.0003349798455903035,
+    0.0003654325561959198,
+    0.00039588526680153606,
+    0.00042633797740715233
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ifm.json
new file mode 100644
index 000000000..97931cc58
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.003882720833644271,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ker.json
new file mode 100644
index 000000000..add4d0f35
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ker.json
@@ -0,0 +1,58 @@
+{
+  "weights": [
+    [
+      [
+        [
+          116,
+          170,
+          137,
+          182
+        ],
+        [
+          0,
+          255,
+          0,
+          255
+        ]
+      ],
+      [
+        [
+          162,
+          213,
+          177,
+          219
+        ],
+        [
+          255,
+          0,
+          255,
+          0
+        ]
+      ]
+    ]
+  ],
+  "scale": [
+    0.08627450980392157,
+    0.09411764705882353,
+    0.10196078431372549,
+    0.10980392156862745
+  ],
+  "zero_point": [
+    104.0,
+    149.0,
+    108.0,
+    146.0
+  ],
+  "min": [
+    -8.972549019607843,
+    -14.023529411764706,
+    -11.011764705882353,
+    -16.031372549019608
+  ],
+  "max": [
+    13.027450980392157,
+    9.976470588235294,
+    14.988235294117647,
+    11.968627450980392
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ofm.json
new file mode 100644
index 000000000..f587aac24
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.07756166160106659,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ifm.json
new file mode 100644
index 000000000..fa8fffc3e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": 0.003264044094830751,
+  "max": 0.9900938200950622
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ofm.json
new file mode 100644
index 000000000..612c0b4ea
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002/channel/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": 0.0,
+  "max": 19.778222274780273
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/fake_quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/fake_quantization/weight.json
new file mode 100644
index 000000000..4661cb3ca
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/fake_quantization/weight.json
@@ -0,0 +1,76 @@
+{
+  "weights": [
+    [
+      1.0039215087890625,
+      2.007843017578125,
+      -3.0117650032043457,
+      -4.015686511993408,
+      -5.019608020782471,
+      6.023530006408691,
+      -7.027451515197754,
+      7.968626976013184,
+      4.015686988830566,
+      -2.007843017578125,
+      3.0117645263671875,
+      -1.0039215087890625,
+      -7.9686279296875,
+      -6.023530006408691,
+      7.027451515197754,
+      5.019608497619629
+    ],
+    [
+      1.0039215087890625,
+      2.007843017578125,
+      -3.0117650032043457,
+      -4.015686511993408,
+      -5.019608020782471,
+      6.023530006408691,
+      -7.027451515197754,
+      7.968626976013184,
+      4.015686988830566,
+      -2.007843017578125,
+      3.0117645263671875,
+      -1.0039215087890625,
+      -7.9686279296875,
+      -6.023530006408691,
+      7.027451515197754,
+      5.019608497619629
+    ],
+    [
+      1.0039215087890625,
+      2.007843017578125,
+      -3.0117650032043457,
+      -4.015686511993408,
+      -5.019608020782471,
+      6.023530006408691,
+      -7.027451515197754,
+      7.968626976013184,
+      4.015686988830566,
+      -2.007843017578125,
+      3.0117645263671875,
+      -1.0039215087890625,
+      -7.9686279296875,
+      -6.023530006408691,
+      7.027451515197754,
+      5.019608497619629
+    ],
+    [
+      1.0039215087890625,
+      2.007843017578125,
+      -3.0117650032043457,
+      -4.015686511993408,
+      -5.019608020782471,
+      6.023530006408691,
+      -7.027451515197754,
+      7.968626976013184,
+      4.015686988830566,
+      -2.007843017578125,
+      3.0117645263671875,
+      -1.0039215087890625,
+      -7.9686279296875,
+      -6.023530006408691,
+      7.027451515197754,
+      5.019608497619629
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/bias.json
new file mode 100644
index 000000000..4333c0fed
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/bias.json
@@ -0,0 +1,14 @@
+{
+  "weights": [
+    4099,
+    -8199,
+    -12298,
+    16398
+  ],
+  "scale": [
+    0.00024393631821001058,
+    0.00024393631821001058,
+    0.00024393631821001058,
+    0.00024393631821001058
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/in.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/in.json
new file mode 100644
index 000000000..8edac1bd9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/in.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.003887734841555357,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/out.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/out.json
new file mode 100644
index 000000000..1b94f1652
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/out.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.061938945204019547,
+  "zero_point": 171.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/weight.json
new file mode 100644
index 000000000..5ee46c87f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/quantization/weight.json
@@ -0,0 +1,100 @@
+{
+  "weights": [
+    [
+      144,
+      160,
+      80,
+      64,
+      48,
+      224,
+      16,
+      255,
+      192,
+      96,
+      176,
+      112,
+      1,
+      32,
+      240,
+      208
+    ],
+    [
+      144,
+      160,
+      80,
+      64,
+      48,
+      224,
+      16,
+      255,
+      192,
+      96,
+      176,
+      112,
+      1,
+      32,
+      240,
+      208
+    ],
+    [
+      144,
+      160,
+      80,
+      64,
+      48,
+      224,
+      16,
+      255,
+      192,
+      96,
+      176,
+      112,
+      1,
+      32,
+      240,
+      208
+    ],
+    [
+      144,
+      160,
+      80,
+      64,
+      48,
+      224,
+      16,
+      255,
+      192,
+      96,
+      176,
+      112,
+      1,
+      32,
+      240,
+      208
+    ]
+  ],
+  "scale": [
+    0.06274509803921569,
+    0.06274509803921569,
+    0.06274509803921569,
+    0.06274509803921569
+  ],
+  "zero_point": [
+    128.0,
+    128.0,
+    128.0,
+    128.0
+  ],
+  "min": [
+    -8.031372549019608,
+    -8.031372549019608,
+    -8.031372549019608,
+    -8.031372549019608
+  ],
+  "max": [
+    7.968627450980392,
+    7.968627450980392,
+    7.968627450980392,
+    7.968627450980392
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/in.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/in.json
new file mode 100644
index 000000000..48e4645c9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/in.json
@@ -0,0 +1,4 @@
+{
+  "min": 0.010438590832054616,
+  "max": 0.9913724160194397
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/out.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/out.json
new file mode 100644
index 000000000..ec83b94d1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003/channel/uint8/record_minmax/out.json
@@ -0,0 +1,4 @@
+{
+  "min": -10.584291763305664,
+  "max": 5.210139312744141
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/fake_quantization/ker.json
new file mode 100644
index 000000000..76a0440a0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+  "weights": [
+    [
+      [
+        [
+          0.960784912109375,
+          2.0588245391845703
+        ],
+        [
+          -3.0196075439453125,
+          -3.980391502380371
+        ],
+        [
+          4.9411773681640625,
+          -6.039215087890625
+        ]
+      ],
+      [
+        [
+          7.0,
+          7.960784912109375
+        ],
+        [
+          -9.058823585510254,
+          -10.019607543945312
+        ],
+        [
+          10.980392456054688,
+          -11.941176414489746
+        ]
+      ],
+      [
+        [
+          13.039216995239258,
+          14.000001907348633
+        ],
+        [
+          -14.960784912109375,
+          -16.05882453918457
+        ],
+        [
+          17.019607543945312,
+          -17.980392456054688
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ifm.json
new file mode 100644
index 000000000..4c3669f6b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ifm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0038701011799275875,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ker.json
new file mode 100644
index 000000000..04e0648de
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ker.json
@@ -0,0 +1,60 @@
+{
+  "weights": [
+    [
+      [
+        [
+          138,
+          146
+        ],
+        [
+          109,
+          102
+        ],
+        [
+          167,
+          87
+        ]
+      ],
+      [
+        [
+          182,
+          189
+        ],
+        [
+          65,
+          58
+        ],
+        [
+          211,
+          44
+        ]
+      ],
+      [
+        [
+          226,
+          233
+        ],
+        [
+          22,
+          14
+        ],
+        [
+          255,
+          0
+        ]
+      ]
+    ]
+  ],
+  "scale": [
+    0.13725490196078433
+  ],
+  "zero_point": [
+    131.0
+  ],
+  "min": [
+    -17.980392156862745
+  ],
+  "max": [
+    17.019607843137255
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ofm.json
new file mode 100644
index 000000000..2e1790508
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.25486624240875244,
+  "zero_point": 178.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ifm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ifm.json
new file mode 100644
index 000000000..d46844baf
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ifm.json
@@ -0,0 +1,4 @@
+{
+  "min": 0.006121497452259064,
+  "max": 0.9868757891654968
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ofm.json
new file mode 100644
index 000000000..4441f1876
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001/channel/uint8/record_minmax/ofm.json
@@ -0,0 +1,4 @@
+{
+  "min": -45.46586318969727,
+  "max": 19.525028419494628
+}
diff --git a/compiler/pota-quantization-value-test/test.lst b/compiler/pota-quantization-value-test/test.lst
index 9eb348922..d9fd91761 100644
--- a/compiler/pota-quantization-value-test/test.lst
+++ b/compiler/pota-quantization-value-test/test.lst
@@ -1,4 +1,8 @@
+addTest(Conv2D_004 channel uint8)
 addTest(Conv2D_004 layer uint8)
+addTest(DepthwiseConv2D_002 channel uint8)
 addTest(DepthwiseConv2D_002 layer uint8)
+addTest(FullyConnected_003 channel uint8)
 addTest(FullyConnected_003 layer uint8)
+addTest(TransposeConv_001 channel uint8)
 addTest(TransposeConv_001 layer uint8)
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/0.txt
new file mode 100644
index 000000000..98e895c04
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/0.txt
@@ -0,0 +1 @@
+0.19242816,0.44059092,0.06788187,0.04543579,0.14106855,0.6858487 ,0.6214997 ,0.31582046,0.859484  ,0.3664256 ,0.86936104,0.871024  ,0.68752515,0.5296719 ,0.99137205,0.02956272,0.14838405,0.69830126,0.22359788,0.9060323 ,0.7141239 ,0.5573066 ,0.96645916,0.11426282
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/1.txt
new file mode 100644
index 000000000..f480f8086
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/1.txt
@@ -0,0 +1 @@
+0.57016104,0.2788207 ,0.8045938 ,0.7589986 ,0.81506515,0.8411593 ,0.4162234 ,0.1664247 ,0.5584996 ,0.7799966 ,0.4213713 ,0.97587234,0.79440975,0.5089373 ,0.90030503,0.78015554,0.10080549,0.5115089 ,0.77238286,0.9580212 ,0.8758745 ,0.14367636,0.4304664 ,0.55175275
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/2.txt
new file mode 100644
index 000000000..683ea39b0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/2.txt
@@ -0,0 +1 @@
+0.6224246 ,0.30448085,0.29629433,0.44483584,0.30473125,0.6186932 ,0.45563242,0.5394331 ,0.22901213,0.4313142 ,0.4019574 ,0.02263176,0.3806077 ,0.27828163,0.23962335,0.26323524,0.6125012 ,0.5459546 ,0.6340052 ,0.19074932,0.2216875 ,0.77709603,0.03312786,0.02945002
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/3.txt
new file mode 100644
index 000000000..56c8c259e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/3.txt
@@ -0,0 +1 @@
+0.7524557 ,0.5408983 ,0.07039106,0.5143847 ,0.04857475,0.7305833 ,0.36986747,0.42291477,0.90452653,0.43744263,0.24857366,0.7537328 ,0.04559262,0.65276045,0.3851062 ,0.49503985,0.37213495,0.10627239,0.7085863 ,0.1913133 ,0.08057284,0.31767172,0.9685745 ,0.5942544 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/4.txt
new file mode 100644
index 000000000..ecb221e8b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004/channel/uint8/4.txt
@@ -0,0 +1 @@
+0.16251074,0.5574537 ,0.5857036 ,0.877607  ,0.29711136,0.02456062,0.8250261 ,0.21300122,0.5064036 ,0.5882086 ,0.7736793 ,0.09394809,0.98618525,0.6611699 ,0.5001983 ,0.06507304,0.88984424,0.57143325,0.07953393,0.02649987,0.9283147 ,0.65522593,0.18371649,0.12332761
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/0.txt
new file mode 100644
index 000000000..f4fb503ea
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/0.txt
@@ -0,0 +1 @@
+0.4383064 ,0.8700848 ,0.86010957,0.08396256,0.7963264 ,0.4156023 ,0.28146362,0.82196397,0.9921972 ,0.09969576,0.23987265,0.6734369 ,0.5469574 ,0.20805728,0.32639247,0.76773816
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/1.txt
new file mode 100644
index 000000000..af4b01576
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/1.txt
@@ -0,0 +1 @@
+0.4565062 ,0.92036587,0.47286046,0.18118097,0.5347498 ,0.91550153,0.300375  ,0.00581101,0.38686675,0.91085213,0.07278002,0.35556316,0.13014294,0.7274307 ,0.13867259,0.27517235
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/2.txt
new file mode 100644
index 000000000..57716034e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/2.txt
@@ -0,0 +1 @@
+0.6900174 ,0.28745306,0.30255774,0.5095008 ,0.6689176 ,0.4914624 ,0.92629427,0.504829  ,0.33514255,0.49005315,0.08569656,0.60965323,0.82193315,0.12380831,0.06971261,0.8822662 
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/3.txt
new file mode 100644
index 000000000..1e03d83b0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/3.txt
@@ -0,0 +1 @@
+0.4240734 ,0.5430392 ,0.7536325 ,0.46065134,0.00315792,0.02719985,0.7080977 ,0.24389206,0.8114604 ,0.13292362,0.346597  ,0.70247084,0.55753845,0.01969242,0.82950485,0.66249627
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/4.txt
new file mode 100644
index 000000000..89ee30a6b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002/channel/uint8/4.txt
@@ -0,0 +1 @@
+0.31586212,0.19079527,0.9161567 ,0.8614566 ,0.9018915 ,0.34651542,0.62554437,0.05542602,0.8268219 ,0.38112178,0.9396123 ,0.49426383,0.8034765 ,0.72456217,0.5404088 ,0.8512237 
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/0.txt
new file mode 100644
index 000000000..9b19de586
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/0.txt
@@ -0,0 +1 @@
+0.12934422,0.01033248,0.85648465,0.77248603,0.5128501 ,0.2453174 ,0.05065866,0.6601359 ,0.984665  ,0.57697976,0.58360994,0.79360527,0.90097004,0.26150337,0.1575109 ,0.9711614 
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/1.txt
new file mode 100644
index 000000000..45247791a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/1.txt
@@ -0,0 +1 @@
+0.23895125,0.30275205,0.9916519 ,0.52355504,0.2577219 ,0.03600567,0.75446343,0.8064663 ,0.07550113,0.919774  ,0.84333146,0.48820078,0.31365713,0.97172034,0.7472666 ,0.66353893
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/2.txt
new file mode 100644
index 000000000..851e72c7d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/2.txt
@@ -0,0 +1 @@
+0.6186688 ,0.4357826 ,0.63239735,0.64489084,0.17722449,0.7146202 ,0.5182415 ,0.45549247,0.21316396,0.9769707 ,0.18412311,0.05855984,0.6755795 ,0.8516815 ,0.20649713,0.32990783
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/3.txt
new file mode 100644
index 000000000..7ff3c7576
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/3.txt
@@ -0,0 +1 @@
+0.15501449,0.67026544,0.2957976 ,0.95577955,0.6215903 ,0.2029572 ,0.6069057 ,0.60434276,0.01298514,0.66787016,0.02053251,0.34120578,0.63562113,0.9166186 ,0.7134427 ,0.95491254
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/4.txt
new file mode 100644
index 000000000..fe60dbd26
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003/channel/uint8/4.txt
@@ -0,0 +1 @@
+0.46877268,0.36748132,0.09441566,0.4476946 ,0.08834982,0.5387882 ,0.8359256 ,0.4374628 ,0.3835091 ,0.3577151 ,0.49470654,0.6017202 ,0.3546875 ,0.64218026,0.69008195,0.37631917
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/0.txt
new file mode 100644
index 000000000..fb728bb70
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/0.txt
@@ -0,0 +1 @@
+0.5177879 ,0.10991199,0.19134527,0.25834408,0.16297385,0.5499753 ,0.8782323 ,0.74750453,0.16825114,0.72425395,0.68458   ,0.9399099 ,0.81214494,0.73325175,0.6407931 ,0.02865177,0.04341139,0.44781777,0.59848577,0.72099334,0.654926  ,0.93810713,0.5193446 ,0.8657371 ,0.50826824,0.10122011,0.6946167 ,0.5009533 ,0.27305812,0.7708204 ,0.14410722,0.7092205 
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/1.txt
new file mode 100644
index 000000000..8c72dc764
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/1.txt
@@ -0,0 +1 @@
+0.57410187,0.5534829 ,0.434663  ,0.55580896,0.9040647 ,0.16827786,0.82538676,0.25387943,0.7611494 ,0.49195638,0.00602222,0.20389748,0.541152  ,0.962896  ,0.37785006,0.9330408 ,0.9868882 ,0.57428783,0.830525  ,0.67987496,0.5576374 ,0.4303    ,0.8442439 ,0.21868347,0.45653513,0.7913927 ,0.31475154,0.6723579 ,0.5749264 ,0.07061622,0.6450232 ,0.52825755
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/2.txt
new file mode 100644
index 000000000..04ff6ae29
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/2.txt
@@ -0,0 +1 @@
+0.49751657,0.3004485 ,0.11624487,0.17704253,0.9022095 ,0.24667789,0.9204152 ,0.09801941,0.9194739 ,0.35418576,0.36659864,0.4962548 ,0.83799136,0.58057517,0.2948883 ,0.28411615,0.14429809,0.8460358 ,0.7026028 ,0.25956342,0.5251088 ,0.06569998,0.01754393,0.45209908,0.95638806,0.6044543 ,0.17229715,0.6828144 ,0.8684328 ,0.5829665 ,0.1456113 ,0.3334334 
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/3.txt
new file mode 100644
index 000000000..1342dac2f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/3.txt
@@ -0,0 +1 @@
+0.00850414,0.5746211 ,0.7659193 ,0.8643168 ,0.36803156,0.08386383,0.76002747,0.19255683,0.05220222,0.18169314,0.88597506,0.6793377 ,0.45955214,0.16984127,0.5275391 ,0.910098  ,0.64607793,0.3997594 ,0.38601097,0.40899974,0.10289235,0.896202  ,0.22364503,0.30232555,0.11873382,0.07853477,0.20674925,0.35148785,0.02880615,0.09937044,0.4382221 ,0.53562754
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/4.txt
new file mode 100644
index 000000000..e3e85392e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001/channel/uint8/4.txt
@@ -0,0 +1 @@
+0.8097857 ,0.4602844 ,0.01609277,0.7885611 ,0.9090256 ,0.75475484,0.98657864,0.5927874 ,0.73494065,0.374227  ,0.23557834,0.6020654 ,0.0122237 ,0.37126908,0.38277507,0.67635936,0.4139088 ,0.8625733 ,0.37775922,0.15304309,0.6196326 ,0.4827059 ,0.76868814,0.5530773 ,0.3336473 ,0.11217184,0.5877591 ,0.5325879 ,0.48493427,0.6317438 ,0.9385114 ,0.02825027
diff --git a/compiler/record-minmax/src/RecordMinMax.cpp b/compiler/record-minmax/src/RecordMinMax.cpp
index 17c6aa6ff..0ef7cccd1 100644
--- a/compiler/record-minmax/src/RecordMinMax.cpp
+++ b/compiler/record-minmax/src/RecordMinMax.cpp
@@ -16,12 +16,12 @@
 
 #include "RecordMinMax.h"
 #include "RecordFunction.h"
-#include "CircleExpContract.h"
 #include "MinMaxObserver.h"
 #include "HDF5Importer.h"
 
 #include <luci/Importer.h>
 #include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
 #include <luci/IR/CircleQuantParam.h>
 
 #include <algorithm>
@@ -83,6 +83,15 @@ void RecordMinMax::initialize(const std::string &input_model_path)
   }
   std::vector<char> model_data((std::istreambuf_iterator<char>(fs)),
                                std::istreambuf_iterator<char>());
+
+  // Verify flatbuffers
+  flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model_data.data()),
+                                 model_data.size()};
+  if (!circle::VerifyModelBuffer(verifier))
+  {
+    throw std::runtime_error("ERROR: Failed to verify circle '" + input_model_path + "'");
+  }
+
   _module = luci::Importer().importModule(circle::GetModel(model_data.data()));
 
   if (_module == nullptr)
@@ -185,7 +194,8 @@ void RecordMinMax::saveModel(const std::string &output_model_path)
 {
   // Export to output Circle file
   luci::CircleExporter exporter;
-  CircleExpContract contract(_module.get(), output_model_path);
+
+  luci::CircleFileExpContract contract(_module.get(), output_model_path);
 
   if (!exporter.invoke(&contract))
   {
diff --git a/compiler/souschef/CMakeLists.txt b/compiler/souschef/CMakeLists.txt
index 5a307be16..ca7eddc6f 100644
--- a/compiler/souschef/CMakeLists.txt
+++ b/compiler/souschef/CMakeLists.txt
@@ -1,5 +1,13 @@
+nnas_find_package(Protobuf QUIET)
+
+if(NOT Protobuf_FOUND)
+  message(STATUS "Build souschef: FAILED (missing Protobuf")
+  return()
+endif(NOT Protobuf_FOUND)
+
 file(GLOB_RECURSE SOURCES "src/*.cpp")
 
 add_library(souschef STATIC ${SOURCES})
 set_target_properties(souschef PROPERTIES POSITION_INDEPENDENT_CODE ON)
 target_include_directories(souschef PUBLIC include)
+target_link_libraries(souschef PUBLIC libprotobuf)
diff --git a/compiler/souschef/include/souschef/Dataset.h b/compiler/souschef/include/souschef/Dataset.h
index 46a12e424..ef67a7316 100644
--- a/compiler/souschef/include/souschef/Dataset.h
+++ b/compiler/souschef/include/souschef/Dataset.h
@@ -19,6 +19,8 @@
 
 #include <vector>
 
+#include <google/protobuf/repeated_field.h>
+
 namespace souschef
 {
 
@@ -57,6 +59,21 @@ private:
   std::vector<T> _vec;
 };
 
+template <typename T> std::vector<T> as_vector(const ::google::protobuf::RepeatedPtrField<T> &field)
+{
+  std::vector<T> res;
+  for (const auto &elem : field)
+  {
+    res.emplace_back(elem);
+  }
+  return res;
+}
+
+template <typename T> Dataset<T> as_dataset(const ::google::protobuf::RepeatedPtrField<T> &field)
+{
+  return Dataset<T>(as_vector<T>(field));
+}
+
 } // namespace souschef
 
 #endif // __SOUSCHEF_DATASET_H__
diff --git a/compiler/souschef/include/souschef/Dims.h b/compiler/souschef/include/souschef/Dims.h
new file mode 100644
index 000000000..52c64dd47
--- /dev/null
+++ b/compiler/souschef/include/souschef/Dims.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SOUSCHEF_DIMS_H__
+#define __SOUSCHEF_DIMS_H__
+
+#include <functional>
+#include <numeric>
+#include <vector>
+
+namespace souschef
+{
+
+template <typename T> using Dims = std::vector<T>;
+
+template <typename SHAPETYPE> Dims<int32_t> as_dims(const SHAPETYPE &shape)
+{
+  std::vector<int32_t> res;
+
+  for (auto &dim : shape.dim())
+  {
+    res.emplace_back(static_cast<int32_t>(dim));
+  }
+
+  return res;
+}
+
+int32_t element_count(const Dims<int32_t> &dims)
+{
+  return std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<int32_t>());
+}
+
+} // namespace souschef
+
+#endif // __SOUSCHEF_DIMS_H__
diff --git a/compiler/souschef/include/souschef/TensorFiller.h b/compiler/souschef/include/souschef/TensorFiller.h
new file mode 100644
index 000000000..1d87f1372
--- /dev/null
+++ b/compiler/souschef/include/souschef/TensorFiller.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SOUSCHEF_TENSOR_FILLER_H__
+#define __SOUSCHEF_TENSOR_FILLER_H__
+
+#include <map>
+#include <vector>
+
+namespace souschef
+{
+
+class TensorFiller
+{
+public:
+  virtual ~TensorFiller() = default;
+
+  /**
+   * @brief This will record the tensor by index, if it needs filler option,
+   *        such as kernel, bias.
+   */
+  void set_tensor_filler(uint32_t tensor_index) { _tensor_filler[tensor_index] = true; }
+
+  /**
+   * @brief This will store int32 filler values such as reshape information for the tensor
+   */
+  void set_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
+  {
+    _tensor_filler_vint32[tensor_index] = expvalues;
+  }
+
+  void set_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
+  {
+    _tensor_filler_vfloat[tensor_index] = expvalues;
+  }
+
+  /**
+   * @brief This will return true if the tensor by index, needs a filler option.
+   */
+  bool get_tensor_filler(uint32_t tensor_index)
+  {
+    auto it = _tensor_filler.find(tensor_index);
+    if (it != _tensor_filler.end())
+    {
+      return it->second;
+    }
+    return false;
+  }
+
+  /**
+   * @brief This will return true if the tensor by index, needs a int array filler option.
+   */
+  bool get_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
+  {
+    auto it = _tensor_filler_vint32.find(tensor_index);
+    if (it != _tensor_filler_vint32.end())
+    {
+      expvalues = it->second;
+      return true;
+    }
+    return false;
+  }
+
+  bool get_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
+  {
+    auto it = _tensor_filler_vfloat.find(tensor_index);
+    if (it != _tensor_filler_vfloat.end())
+    {
+      expvalues = it->second;
+      return true;
+    }
+    return false;
+  }
+
+private:
+  std::map<uint32_t, bool> _tensor_filler{};
+  std::map<uint32_t, std::vector<int32_t>> _tensor_filler_vint32{};
+  std::map<uint32_t, std::vector<float>> _tensor_filler_vfloat{};
+};
+
+} // namespace souschef
+
+#endif // __SOUSCHEF_TENSOR_FILLER_H__
diff --git a/compiler/circle-quantizer/src/CircleExpContract.cpp b/compiler/souschef/src/Dims.cpp
index b56b7eedc..fba4813fc 100644
--- a/compiler/circle-quantizer/src/CircleExpContract.cpp
+++ b/compiler/souschef/src/Dims.cpp
@@ -14,20 +14,6 @@
  * limitations under the License.
  */
 
-#include "CircleExpContract.h"
+#include "souschef/Dims.h"
 
-#include <oops/InternalExn.h>
-
-#include <fstream>
-#include <iostream>
-
-bool CircleExpContract::store(const char *ptr, const size_t size) const
-{
-  if (!ptr)
-    INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
-
-  std::ofstream fs(_filepath.c_str(), std::ofstream::binary);
-  fs.write(ptr, size);
-
-  return fs.good();
-}
+// NOTE Do NOT delete this file; this file checks the completeness of 'Dims.h'
diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp
index 692ce48c1..a4b435dfa 100644
--- a/compiler/tflchef/core/src/ModelChef.cpp
+++ b/compiler/tflchef/core/src/ModelChef.cpp
@@ -26,6 +26,7 @@
 #include "OpChefs.h"
 
 #include <souschef/Dataset.h>
+#include <souschef/Dims.h>
 
 #include "Log.h"
 
@@ -41,52 +42,8 @@
 #include <sstream>
 #include <stdexcept>
 
-namespace
-{
-
 using namespace souschef;
 
-template <typename T> std::vector<T> as_vector(const ::google::protobuf::RepeatedPtrField<T> &field)
-{
-  std::vector<T> res;
-  for (const auto &elem : field)
-  {
-    res.emplace_back(elem);
-  }
-  return res;
-}
-
-template <typename T> Dataset<T> as_dataset(const ::google::protobuf::RepeatedPtrField<T> &field)
-{
-  return Dataset<T>(as_vector<T>(field));
-}
-
-} // namespace
-
-namespace
-{
-
-template <typename T> using Dims = std::vector<T>;
-
-Dims<int32_t> as_dims(const tflchef::TensorShape &shape)
-{
-  std::vector<int32_t> res;
-
-  for (auto &dim : shape.dim())
-  {
-    res.emplace_back(static_cast<int32_t>(dim));
-  }
-
-  return res;
-}
-
-int32_t element_count(const Dims<int32_t> &dims)
-{
-  return std::accumulate(dims.begin(), dims.end(), 1, std::multiplies<int32_t>());
-}
-
-} // namespace
-
 namespace
 {
 
diff --git a/compiler/record-minmax/src/CircleExpContract.cpp b/compiler/tflchef/core/src/Op/NonMaxSuppressionV5.cpp
index b703250bd..500aa467f 100644
--- a/compiler/record-minmax/src/CircleExpContract.cpp
+++ b/compiler/tflchef/core/src/Op/NonMaxSuppressionV5.cpp
@@ -14,25 +14,17 @@
  * limitations under the License.
  */
 
-#include "CircleExpContract.h"
+#include "NonMaxSuppressionV5.h"
 
-#include <oops/InternalExn.h>
-
-#include <fstream>
-#include <iostream>
-
-namespace record_minmax
-{
-
-bool CircleExpContract::store(const char *ptr, const size_t size) const
+flatbuffers::Offset<void> NonMaxSuppressionV5Chef::value(flatbuffers::FlatBufferBuilder &fbb) const
 {
-  if (!ptr)
-    INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
+  tflite::NonMaxSuppressionV5OptionsBuilder options_builder{fbb};
 
-  std::ofstream fs(_filepath, std::ofstream::binary);
-  fs.write(ptr, size);
-
-  return fs.good();
+  return options_builder.Finish().Union();
 }
 
-} // namespace record_minmax
+std::unique_ptr<OpChef>
+NonMaxSuppressionV5ChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new NonMaxSuppressionV5Chef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/NonMaxSuppressionV5.h b/compiler/tflchef/core/src/Op/NonMaxSuppressionV5.h
new file mode 100644
index 000000000..a3c8b6009
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/NonMaxSuppressionV5.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_NON_MAX_SUPPRESSION_V5_H__
+#define __OP_NON_MAX_SUPPRESSION_V5_H__
+
+#include "OpChef.h"
+
+class NonMaxSuppressionV5Chef final : public OpChef
+{
+public:
+  explicit NonMaxSuppressionV5Chef(const tflchef::Operation *operation) : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override
+  {
+    return tflite::BuiltinOperator_NON_MAX_SUPPRESSION_V5;
+  }
+
+  tflite::BuiltinOptions type(void) const override
+  {
+    return tflite::BuiltinOptions_NonMaxSuppressionV5Options;
+  }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct NonMaxSuppressionV5ChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_NON_MAX_SUPPRESSION_V5_H__
diff --git a/compiler/tflchef/core/src/OpChef.def b/compiler/tflchef/core/src/OpChef.def
index 244186265..6b242e811 100644
--- a/compiler/tflchef/core/src/OpChef.def
+++ b/compiler/tflchef/core/src/OpChef.def
@@ -56,6 +56,7 @@ OP_CHEF(MirrorPad, MirrorPadChefFactory)
 OP_CHEF(Mul, MulChefFactory)
 OP_CHEF(Neg, NegChefFactory)
 OP_CHEF(NonMaxSuppressionV4, NonMaxSuppressionV4ChefFactory)
+OP_CHEF(NonMaxSuppressionV5, NonMaxSuppressionV5ChefFactory)
 OP_CHEF(NotEqual, NotEqualChefFactory)
 OP_CHEF(OneHot, OneHotChefFactory)
 OP_CHEF(Pack, PackChefFactory)
diff --git a/compiler/tflchef/core/src/OpChefs.h b/compiler/tflchef/core/src/OpChefs.h
index 5b2e89bd9..7637b1c69 100644
--- a/compiler/tflchef/core/src/OpChefs.h
+++ b/compiler/tflchef/core/src/OpChefs.h
@@ -69,6 +69,7 @@
 #include "Op/Mul.h"
 #include "Op/Neg.h"
 #include "Op/NonMaxSuppressionV4.h"
+#include "Op/NonMaxSuppressionV5.h"
 #include "Op/NotEqual.h"
 #include "Op/OneHot.h"
 #include "Op/Pack.h"
diff --git a/compiler/tflchef/proto/tflchef.proto b/compiler/tflchef/proto/tflchef.proto
index 70b966ec3..9909d517a 100644
--- a/compiler/tflchef/proto/tflchef.proto
+++ b/compiler/tflchef/proto/tflchef.proto
@@ -371,6 +371,10 @@ message NonMaxSuppressionV4Options {
   // None
 }
 
+message NonMaxSuppressionV5Options {
+  // None
+}
+
 message NotEqualOptions {
   // None
 }
@@ -544,7 +548,7 @@ message Operation {
   // HardSwishOptions 196
   optional DepthToSpaceOptions depth_to_space_options = 197;
   optional NonMaxSuppressionV4Options non_max_suppression_v4_options = 198;
-  // NonMaxSuppressionV5Options 199
+  optional NonMaxSuppressionV5Options non_max_suppression_v5_options = 199;
   optional ScatterNdOptions scatter_nd_options = 200;
   optional NotEqualOptions notequal_options = 201;
   optional ExpandDimsOptions expand_dims_options = 202;
diff --git a/compiler/tflchef/tflite/CMakeLists.txt b/compiler/tflchef/tflite/CMakeLists.txt
index 645c16144..83127cb3e 100644
--- a/compiler/tflchef/tflite/CMakeLists.txt
+++ b/compiler/tflchef/tflite/CMakeLists.txt
@@ -7,3 +7,4 @@ target_link_libraries(tflchef_tflite tflchef_proto)
 target_link_libraries(tflchef_tflite mio_tflite)
 target_link_libraries(tflchef_tflite stdex)
 target_link_libraries(tflchef_tflite cwrap)
+target_link_libraries(tflchef_tflite souschef)
diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp
new file mode 100644
index 000000000..db7f4c932
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NonMaxSuppressionV5.h"
+
+#include "Convert.h"
+#include "FillerHelper.h"
+
+namespace tflchef
+{
+
+void TFliteOpNonMaxSuppressionV5::filler(const tflite::Operator *op, TFliteImport *import,
+                                         tflchef::ModelRecipe *model_recipe) const
+{
+  const auto &inputs = *op->inputs();
+
+  const tflite::Tensor *max_output_size_tensor = import->tensors()->Get(inputs[2]);
+  assert(max_output_size_tensor->type() == tflite::TensorType::TensorType_INT32);
+
+  const tflite::Tensor *iou_threshold_tensor = import->tensors()->Get(inputs[3]);
+  assert(iou_threshold_tensor->type() == tflite::TensorType::TensorType_FLOAT32);
+
+  const tflite::Tensor *score_threshold_tensor = import->tensors()->Get(inputs[4]);
+  assert(score_threshold_tensor->type() == tflite::TensorType::TensorType_FLOAT32);
+
+  const tflite::Tensor *soft_nms_sigma_tensor = import->tensors()->Get(inputs[5]);
+  assert(soft_nms_sigma_tensor->type() == tflite::TensorType::TensorType_FLOAT32);
+
+  for (int32_t index = 2; index < 6; ++index)
+  {
+    fill_tensor_to_import(index, import);
+  }
+}
+
+tflchef::Operation *TFliteOpNonMaxSuppressionV5::build(const tflite::Operator *op,
+                                                       TFliteImport *import,
+                                                       tflchef::ModelRecipe *model_recipe) const
+{
+  auto operation = model_recipe->add_operation();
+
+  operation->set_type("NonMaxSuppressionV5");
+
+  return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h
new file mode 100644
index 000000000..c948043f4
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/NonMaxSuppressionV5.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_NON_MAX_SUPPRESSION_V5_H__
+#define __TFLITE_OP_NON_MAX_SUPPRESSION_V5_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for NON_MAX_SUPPRESSION_V5
+ */
+class TFliteOpNonMaxSuppressionV5 : public TFliteOpChef
+{
+public:
+  void filler(const tflite::Operator *op, TFliteImport *import,
+              tflchef::ModelRecipe *model_recipe) const override;
+  tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+                            tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_NON_MAX_SUPPRESSION_V5_H__
diff --git a/compiler/tflchef/tflite/src/TFliteImport.h b/compiler/tflchef/tflite/src/TFliteImport.h
index 5b46f4501..9d0a642ab 100644
--- a/compiler/tflchef/tflite/src/TFliteImport.h
+++ b/compiler/tflchef/tflite/src/TFliteImport.h
@@ -19,6 +19,8 @@
 
 #include <mio/tflite/schema_generated.h>
 
+#include <souschef/TensorFiller.h>
+
 #include <tflchef.pb.h>
 
 #include <map>
@@ -40,7 +42,7 @@ bool is_custom(const tflite::OperatorCode *opcode);
 /**
  * @brief Loads TF lite file and provides helpers to access attributes
  */
-class TFliteImport
+class TFliteImport : public souschef::TensorFiller
 {
 public:
   TFliteImport(const tflite::Model *model);
@@ -63,63 +65,6 @@ public:
   std::string opcode_name(const tflite::Operator *op) const;
   size_t buffer_info(const tflite::Tensor *tensor, const uint8_t **buff_data);
 
-  /**
-   * @brief This will record the tensor by index, if it needs filler option,
-   *        such as kernel, bias.
-   */
-  void set_tensor_filler(uint32_t tensor_index) { _tensor_filler[tensor_index] = true; }
-
-  /**
-   * @brief This will store int32 filler values such as reshape information for the tensor
-   */
-  void set_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
-  {
-    _tensor_filler_vint32[tensor_index] = expvalues;
-  }
-
-  void set_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
-  {
-    _tensor_filler_vfloat[tensor_index] = expvalues;
-  }
-
-  /**
-   * @brief This will return true if the tensor by index, needs a filler option.
-   */
-  bool get_tensor_filler(uint32_t tensor_index)
-  {
-    auto it = _tensor_filler.find(tensor_index);
-    if (it != _tensor_filler.end())
-    {
-      return it->second;
-    }
-    return false;
-  }
-
-  /**
-   * @brief This will return true if the tensor by index, needs a int array filler option.
-   */
-  bool get_tensor_filler(uint32_t tensor_index, std::vector<int32_t> &expvalues)
-  {
-    auto it = _tensor_filler_vint32.find(tensor_index);
-    if (it != _tensor_filler_vint32.end())
-    {
-      expvalues = it->second;
-      return true;
-    }
-    return false;
-  }
-
-  bool get_tensor_filler(uint32_t tensor_index, std::vector<float> &expvalues)
-  {
-    auto it = _tensor_filler_vfloat.find(tensor_index);
-    if (it != _tensor_filler_vfloat.end())
-    {
-      expvalues = it->second;
-      return true;
-    }
-    return false;
-  }
-
 private:
   const TFliteSubGraphs_t *_subgraphs{nullptr};
   const TFliteBuffers_t *_buffers{nullptr};
@@ -129,10 +74,6 @@ private:
   std::vector<const tflite::OperatorCode *> _op_codes{};
   std::vector<int32_t> _inputs{};
   std::vector<int32_t> _outputs{};
-
-  std::map<uint32_t, bool> _tensor_filler{};
-  std::map<uint32_t, std::vector<int32_t>> _tensor_filler_vint32{};
-  std::map<uint32_t, std::vector<float>> _tensor_filler_vfloat{};
 };
 
 } // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/TFliteOpChefs.h b/compiler/tflchef/tflite/src/TFliteOpChefs.h
index de14e37d1..36a010957 100644
--- a/compiler/tflchef/tflite/src/TFliteOpChefs.h
+++ b/compiler/tflchef/tflite/src/TFliteOpChefs.h
@@ -69,6 +69,7 @@
 #include "Op/Mul.h"
 #include "Op/Neg.h"
 #include "Op/NonMaxSuppressionV4.h"
+#include "Op/NonMaxSuppressionV5.h"
 #include "Op/NotEqual.h"
 #include "Op/OneHot.h"
 #include "Op/Pack.h"
diff --git a/compiler/tflchef/tflite/src/TFliteOpRegistry.h b/compiler/tflchef/tflite/src/TFliteOpRegistry.h
index 8d33007be..a454e98b6 100644
--- a/compiler/tflchef/tflite/src/TFliteOpRegistry.h
+++ b/compiler/tflchef/tflite/src/TFliteOpRegistry.h
@@ -106,6 +106,7 @@ private:
     REG_TFL_OP(MUL, TFliteOpMul);
     REG_TFL_OP(NEG, TFliteOpNeg);
     REG_TFL_OP(NON_MAX_SUPPRESSION_V4, TFliteOpNonMaxSuppressionV4);
+    REG_TFL_OP(NON_MAX_SUPPRESSION_V5, TFliteOpNonMaxSuppressionV5);
     REG_TFL_OP(NOT_EQUAL, TFliteOpNotEqual);
     REG_TFL_OP(ONE_HOT, TFliteOpOneHot);
     REG_TFL_OP(PACK, TFliteOpPack);
diff --git a/compiler/tfldump/src/OpPrinter.cpp b/compiler/tfldump/src/OpPrinter.cpp
index df027c3e3..24b9264ff 100644
--- a/compiler/tfldump/src/OpPrinter.cpp
+++ b/compiler/tfldump/src/OpPrinter.cpp
@@ -677,9 +677,11 @@ OpPrinterRegistry::OpPrinterRegistry()
   _op_map[tflite::BuiltinOperator_MIRROR_PAD] = make_unique<MirrorPadPrinter>();
   _op_map[tflite::BuiltinOperator_MUL] = make_unique<MulPrinter>();
   // There is no Option for NON_MAX_SUPPRESSION_V4
+  // There is no Option for NON_MAX_SUPPRESSION_V5
   _op_map[tflite::BuiltinOperator_ONE_HOT] = make_unique<OneHotPrinter>();
   _op_map[tflite::BuiltinOperator_PACK] = make_unique<PackPrinter>();
   // There is no Option for PAD
+  // There is no Option for PADV2
   // There is no Option for PRELU
   // There is no Option for RELU
   // There is no Option for RELU6
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions.h
index 00b3de943..680118618 100644
--- a/compiler/tflite2circle/src/BuildBuiltinOptions.h
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions.h
@@ -63,6 +63,7 @@
 #include "BuildBuiltinOptions/MulOptions.h"
 #include "BuildBuiltinOptions/NegOptions.h"
 #include "BuildBuiltinOptions/NonMaxSuppressionV4Options.h"
+#include "BuildBuiltinOptions/NonMaxSuppressionV5Options.h"
 #include "BuildBuiltinOptions/NotEqualOptions.h"
 #include "BuildBuiltinOptions/OneHotOptions.h"
 #include "BuildBuiltinOptions/PackOptions.h"
diff --git a/compiler/luci-value-test/tester/src/CircleExpContract.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.cpp
index b56b7eedc..637c544ff 100644
--- a/compiler/luci-value-test/tester/src/CircleExpContract.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.cpp
@@ -14,20 +14,17 @@
  * limitations under the License.
  */
 
-#include "CircleExpContract.h"
+#include "NonMaxSuppressionV5Options.h"
 
-#include <oops/InternalExn.h>
-
-#include <fstream>
-#include <iostream>
-
-bool CircleExpContract::store(const char *ptr, const size_t size) const
+namespace tflite2circle
 {
-  if (!ptr)
-    INTERNAL_EXN("Graph was not serialized by FlatBuffer for some reason");
-
-  std::ofstream fs(_filepath.c_str(), std::ofstream::binary);
-  fs.write(ptr, size);
 
-  return fs.good();
+flatbuffers::Offset<circle::NonMaxSuppressionV5Options>
+build_circle_NonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &fb,
+                                        const tflite::Operator *)
+{
+  circle::NonMaxSuppressionV5OptionsBuilder builtin_options_builder{fb};
+  return builtin_options_builder.Finish();
 }
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.h b/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.h
new file mode 100644
index 000000000..faf989acc
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/NonMaxSuppressionV5Options.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_NON_MAX_SUPPRESSION_V5_OPTIONS_H__
+#define __BBO_NON_MAX_SUPPRESSION_V5_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::NonMaxSuppressionV5Options>
+build_circle_NonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &fb,
+                                        const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_NON_MAX_SUPPRESSION_V5_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/CircleModel.cpp b/compiler/tflite2circle/src/CircleModel.cpp
index cb4437a49..14c44cb36 100644
--- a/compiler/tflite2circle/src/CircleModel.cpp
+++ b/compiler/tflite2circle/src/CircleModel.cpp
@@ -119,6 +119,75 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
       // is_variable
       bool is_variable = it->is_variable();
 
+      flatbuffers::Offset<circle::SparsityParameters> sparsity;
+      // sparsity
+      if (it->sparsity())
+      {
+        flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order;
+        flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map;
+        flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::DimensionMetadata>>>
+            dim_metadata;
+
+        // traversal_order
+        if (it->sparsity()->traversal_order())
+        {
+          auto traversal_order_vec = std::vector<int32_t>{
+              it->sparsity()->traversal_order()->begin(), it->sparsity()->traversal_order()->end()};
+          traversal_order = fb->CreateVector(traversal_order_vec);
+        }
+
+        // block_map
+        if (it->sparsity()->block_map())
+        {
+          auto block_map_vec = std::vector<int32_t>{it->sparsity()->block_map()->begin(),
+                                                    it->sparsity()->block_map()->end()};
+          block_map = fb->CreateVector(block_map_vec);
+        }
+
+        // dim_metadata
+        std::vector<flatbuffers::Offset<circle::DimensionMetadata>> dim_metadata_vec;
+        auto tflite_dim_metadata = it->sparsity()->dim_metadata();
+        for (auto it : *tflite_dim_metadata)
+        {
+          // array_segments
+          auto tflite_array_segments_type = it->array_segments_type();
+          auto circle_array_segments =
+              get_circle_sparse_index_vector(*fb, it, tflite_array_segments_type);
+          auto circle_array_segments_type =
+              get_circle_sparse_index_vector_type(tflite_array_segments_type);
+
+          // array_indices
+          auto tflite_array_indices_type = it->array_indices_type();
+          auto circle_array_indices =
+              get_circle_sparse_index_vector(*fb, it, tflite_array_indices_type);
+          auto circle_array_indices_type =
+              get_circle_sparse_index_vector_type(tflite_array_indices_type);
+
+          auto circle_dim_metadata_builder = circle::DimensionMetadataBuilder{*fb};
+
+          circle_dim_metadata_builder.add_format(get_circle_dimension_type(it->format()));
+          circle_dim_metadata_builder.add_dense_size(it->dense_size());
+          circle_dim_metadata_builder.add_array_segments(circle_array_segments);
+          circle_dim_metadata_builder.add_array_segments_type(circle_array_segments_type);
+          circle_dim_metadata_builder.add_array_indices(circle_array_indices);
+          circle_dim_metadata_builder.add_array_indices_type(circle_array_indices_type);
+          auto dim_metadata = circle_dim_metadata_builder.Finish();
+          dim_metadata_vec.emplace_back(dim_metadata);
+        }
+        dim_metadata = fb->CreateVector(dim_metadata_vec);
+
+        sparsity = circle::CreateSparsityParameters(*fb, traversal_order, block_map, dim_metadata);
+      }
+
+      // shape signature
+      flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature;
+      if (it->shape_signature())
+      {
+        auto shape_signature_vec =
+            std::vector<int32_t>({it->shape_signature()->begin(), it->shape_signature()->end()});
+        shape_signature = fb->CreateVector(shape_signature_vec);
+      }
+
       circle::TensorBuilder tensor_builder{*fb};
       tensor_builder.add_shape(shape);
       tensor_builder.add_type(get_circle_tensortype(it->type()));
@@ -126,6 +195,8 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
       tensor_builder.add_name(name);
       tensor_builder.add_quantization(quantization);
       tensor_builder.add_is_variable(is_variable);
+      tensor_builder.add_sparsity(sparsity);
+      tensor_builder.add_shape_signature(shape_signature);
       auto tensor = tensor_builder.Finish();
       tensor_vec.emplace_back(tensor);
     }
@@ -226,6 +297,14 @@ CircleModel::CircleModel(FlatBufBuilder &fb, TFLModel &model)
     : _version{0}, _description{fb->CreateString("nnpackage")}, _fb{fb}
 {
   const tflite::Model *tfl_model = model.load_model();
+  // verify flatbuffers
+  flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model._data.data()),
+                                 model._data.size()};
+  if (!tflite::VerifyModelBuffer(verifier))
+  {
+    throw std::runtime_error("ERROR: Failed to verify tflite");
+  }
+
   _operator_codes_offset =
       std::make_unique<Offset<OperatorCodeLink>>(fb, tfl_model->operator_codes());
   _subGraphs_offset = std::make_unique<Offset<SubGraphLink>>(fb, tfl_model->subgraphs());
diff --git a/compiler/tflite2circle/src/DataLookup.cpp b/compiler/tflite2circle/src/DataLookup.cpp
index b0d35d1a5..75504b062 100644
--- a/compiler/tflite2circle/src/DataLookup.cpp
+++ b/compiler/tflite2circle/src/DataLookup.cpp
@@ -123,4 +123,79 @@ circle::MirrorPadMode get_circle_mirrorpad_mode(tflite::MirrorPadMode tfl_mode)
   }
 }
 
+circle::DimensionType get_circle_dimension_type(tflite::DimensionType tfl_dim_type)
+{
+  switch (tfl_dim_type)
+  {
+    case tflite::DimensionType_DENSE:
+      return circle::DimensionType_DENSE;
+    case tflite::DimensionType_SPARSE_CSR:
+      return circle::DimensionType_SPARSE_CSR;
+    default:
+      throw std::runtime_error("tflite2circle: wrong dimension type.");
+  }
+}
+
+flatbuffers::Offset<void>
+get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb,
+                               const tflite::DimensionMetadata *dm,
+                               const tflite::SparseIndexVector &tfl_sparse_index_vector_type)
+{
+  switch (tfl_sparse_index_vector_type)
+  {
+    case tflite::SparseIndexVector_NONE:
+      return flatbuffers::Offset<void>();
+    case tflite::SparseIndexVector_Int32Vector:
+    {
+      auto values_vec_int32 =
+          std::vector<int32_t>{dm->array_segments_as_Int32Vector()->values()->begin(),
+                               dm->array_segments_as_Int32Vector()->values()->end()};
+      auto values_int32 = fb.CreateVector(values_vec_int32);
+      circle::Int32VectorBuilder int32_vector_builder{fb};
+      int32_vector_builder.add_values(values_int32);
+      return int32_vector_builder.Finish().Union();
+    }
+    case tflite::SparseIndexVector_Uint16Vector:
+    {
+      auto values_vec_uint16 =
+          std::vector<uint16_t>{dm->array_segments_as_Uint16Vector()->values()->begin(),
+                                dm->array_segments_as_Uint16Vector()->values()->end()};
+      auto values_uint16 = fb.CreateVector(values_vec_uint16);
+      circle::Uint16VectorBuilder uint16_vector_builder{fb};
+      uint16_vector_builder.add_values(values_uint16);
+      return uint16_vector_builder.Finish().Union();
+    }
+    case tflite::SparseIndexVector_Uint8Vector:
+    {
+      auto values_vec_uint8 =
+          std::vector<uint8_t>{dm->array_segments_as_Uint8Vector()->values()->begin(),
+                               dm->array_segments_as_Uint8Vector()->values()->end()};
+      auto values_uint8 = fb.CreateVector(values_vec_uint8);
+      circle::Uint8VectorBuilder uint8_vector_builder{fb};
+      uint8_vector_builder.add_values(values_uint8);
+      return uint8_vector_builder.Finish().Union();
+    }
+    default:
+      throw std::runtime_error("tflite2circle: wrong SparseIndexVector type.");
+  }
+}
+
+circle::SparseIndexVector
+get_circle_sparse_index_vector_type(const tflite::SparseIndexVector &tfl_sparse_index_vector_type)
+{
+  switch (tfl_sparse_index_vector_type)
+  {
+    case tflite::SparseIndexVector_NONE:
+      return circle::SparseIndexVector_NONE;
+    case tflite::SparseIndexVector_Int32Vector:
+      return circle::SparseIndexVector_Int32Vector;
+    case tflite::SparseIndexVector_Uint16Vector:
+      return circle::SparseIndexVector_Uint16Vector;
+    case tflite::SparseIndexVector_Uint8Vector:
+      return circle::SparseIndexVector_Uint8Vector;
+    default:
+      throw std::runtime_error("tflite2circle: wrong SparseIndexVector type.");
+  }
+}
+
 } // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/DataLookup.h b/compiler/tflite2circle/src/DataLookup.h
index 7ea01b9c8..26ad74666 100644
--- a/compiler/tflite2circle/src/DataLookup.h
+++ b/compiler/tflite2circle/src/DataLookup.h
@@ -76,6 +76,25 @@ circle::BuiltinOptions get_circle_builtin_options_type(const tflite::Operator *o
 */
 circle::MirrorPadMode get_circle_mirrorpad_mode(tflite::MirrorPadMode tfl_mode);
 
+/**
+ * @brief Returns circle DimensionType according to tflite.
+*/
+circle::DimensionType get_circle_dimension_type(tflite::DimensionType tfl_dim_type);
+
+/**
+ * @brief Returns circle SparseIndexVector according to tflite.
+*/
+flatbuffers::Offset<void>
+get_circle_sparse_index_vector(flatbuffers::FlatBufferBuilder &fb,
+                               const tflite::DimensionMetadata *dm,
+                               const tflite::SparseIndexVector &tfl_sparse_index_vector_type);
+
+/**
+ * @brief Returns circle SparseIndexVector type according to tflite.
+*/
+circle::SparseIndexVector
+get_circle_sparse_index_vector_type(const tflite::SparseIndexVector &tfl_sparse_index_vector_type);
+
 } // namespace tflite2circle
 
 #endif // __DATA_LOOKUP_H__
diff --git a/compiler/tflite2circle/src/TFLBuiltinOptions.lst b/compiler/tflite2circle/src/TFLBuiltinOptions.lst
index a2a14538e..22b59863b 100644
--- a/compiler/tflite2circle/src/TFLBuiltinOptions.lst
+++ b/compiler/tflite2circle/src/TFLBuiltinOptions.lst
@@ -101,7 +101,7 @@ TFL_BUILTIN_OPTIONS(IfOptions)
 TFL_BUILTIN_OPTIONS(WhileOptions)
 TFL_BUILTIN_OPTIONS(DepthToSpaceOptions)
 TFL_BUILTIN_OPTIONS(NonMaxSuppressionV4Options)
-//TFL_BUILTIN_OPTIONS(NonMaxSuppressionV5Options)
+TFL_BUILTIN_OPTIONS(NonMaxSuppressionV5Options)
 TFL_BUILTIN_OPTIONS(RankOptions)
 TFL_BUILTIN_OPTIONS(ScatterNdOptions)
 TFL_BUILTIN_OPTIONS(SegmentSumOptions)
diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt
index b8cb79331..be4398996 100644
--- a/compiler/vconone/CMakeLists.txt
+++ b/compiler/vconone/CMakeLists.txt
@@ -1,5 +1,5 @@
 if (NOT VCONONE_VERSION)
-  set(VCONONE_VERSION 0x0000000000080001)
+  set(VCONONE_VERSION 0x0000000000090001)
   # NOTE order is [build patch minor major]
   # if VCONONE_VERSION is set with -D option, it will be cached
   # you may have to remove cache file if you remove -D option
author	Chunseok Lee <chunseok.lee@samsung.com>	2020-09-05 21:49:46 +0900
committer	Chunseok Lee <chunseok.lee@samsung.com>	2020-09-05 21:49:46 +0900
commit	74476a2d0296bdad70a2f7f90bc7419a8b05bffd (patch)
tree	3f991636c1e9423d38eb16a384c20b569b0d678e /compiler
parent	042b262b3633b6c0f577aed6cb4b980ad0c1dcf3 (diff)
download	nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.gz nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.bz2 nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.zip