65 files changed, 3146 insertions, 2807 deletions
diff --git a/tests/tools/nnapi_test/CMakeLists.txt b/tests/tools/nnapi_test/CMakeLists.txt
deleted file mode 100644
index eac649b15..000000000
--- a/tests/tools/nnapi_test/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-if(NOT BUILD_NNAPI_TEST)
-  return()
-endif(NOT BUILD_NNAPI_TEST)
-
-list(APPEND SOURCES "src/nnapi_test.cc")
-list(APPEND SOURCES "src/args.cc")
-
-nnfw_find_package(Boost REQUIRED program_options)
-
-add_executable(nnapi_test ${SOURCES})
-target_include_directories(nnapi_test PRIVATE ${Boost_INCLUDE_DIRS})
-target_link_libraries(nnapi_test nnfw_lib_tflite)
-target_link_libraries(nnapi_test ${Boost_PROGRAM_OPTIONS_LIBRARY})
-install(TARGETS nnapi_test DESTINATION bin)
diff --git a/tests/tools/nnapi_test/src/args.cc b/tests/tools/nnapi_test/src/args.cc
deleted file mode 100644
index 420e092c0..000000000
--- a/tests/tools/nnapi_test/src/args.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "args.h"
-
-#include <iostream>
-
-namespace nnapi_test
-{
-
-Args::Args(const int argc, char **argv)
-{
-  Initialize();
-  try
-  {
-    Parse(argc, argv);
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << "The argments that cannot be parsed: " << e.what() << '\n';
-    print(argv);
-    exit(255);
-  }
-}
-
-void Args::print(char **argv)
-{
-  std::cout << "nnapi_test\n\n";
-  std::cout << "Usage: " << argv[0] << " <.tflite> [<options>]\n\n";
-  std::cout << _options;
-  std::cout << "\n";
-}
-
-void Args::Initialize(void)
-{
-  // General options
-  po::options_description general("General options", 100);
-
-  // clang-format off
-  general.add_options()
-    ("help,h", "Print available options")
-    ("tflite", po::value<std::string>()->required())
-    ("seed", po::value<int>()->default_value(0), "The seed of random inputs")
-    ("num_runs", po::value<int>()->default_value(2), "The number of runs")
-    ;
-  // clang-format on
-
-  _options.add(general);
-  _positional.add("tflite", 1);
-  _positional.add("seed", 2);
-}
-
-void Args::Parse(const int argc, char **argv)
-{
-  po::variables_map vm;
-  po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
-            vm);
-
-  if (vm.count("help"))
-  {
-    print(argv);
-
-    exit(0);
-  }
-
-  po::notify(vm);
-  if (vm.count("tflite"))
-  {
-    _tflite_filename = vm["tflite"].as<std::string>();
-
-    if (_tflite_filename.empty())
-    {
-      std::cerr << "Please specify tflite file.\n";
-      print(argv);
-      exit(255);
-    }
-    else
-    {
-      if (access(_tflite_filename.c_str(), F_OK) == -1)
-      {
-        std::cerr << "tflite file not found: " << _tflite_filename << "\n";
-        exit(255);
-      }
-    }
-  }
-
-  if (vm.count("seed"))
-  {
-    _seed = vm["seed"].as<int>();
-  }
-
-  if (vm.count("num_runs"))
-  {
-    _num_runs = vm["num_runs"].as<int>();
-    if (_num_runs < 0)
-    {
-      std::cerr << "num_runs value must be greater than 0.\n";
-      exit(255);
-    }
-  }
-}
-
-} // end of namespace nnapi_test
diff --git a/tests/tools/nnapi_test/src/args.h b/tests/tools/nnapi_test/src/args.h
deleted file mode 100644
index 486fbefd5..000000000
--- a/tests/tools/nnapi_test/src/args.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __NNAPI_TEST_ARGS_H__
-#define __NNAPI_TEST_ARGS_H__
-
-#include <boost/program_options.hpp>
-#include <string>
-
-namespace po = boost::program_options;
-
-namespace nnapi_test
-{
-
-class Args
-{
-public:
-  Args(const int argc, char **argv);
-  void print(char **argv);
-
-  const std::string &getTfliteFilename(void) const { return _tflite_filename; }
-  const int getSeed(void) const { return _seed; }
-  const int getNumRuns(void) const { return _num_runs; }
-
-private:
-  void Initialize();
-  void Parse(const int argc, char **argv);
-
-private:
-  po::positional_options_description _positional;
-  po::options_description _options;
-
-  std::string _tflite_filename;
-  int _seed;
-  int _num_runs;
-};
-
-} // end of namespace nnapi_test
-
-#endif // __NNAPI_TEST_ARGS_H__
diff --git a/tests/tools/nnapi_test/src/nnapi_test.cc b/tests/tools/nnapi_test/src/nnapi_test.cc
deleted file mode 100644
index 921d0dc42..000000000
--- a/tests/tools/nnapi_test/src/nnapi_test.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-
-#include "tflite/interp/FlatBufferBuilder.h"
-#include "tflite/RandomTestRunner.h"
-
-#include <iostream>
-#include <stdexcept>
-
-#include "args.h"
-
-using namespace tflite;
-using namespace nnfw::tflite;
-using namespace nnapi_test;
-
-int main(const int argc, char **argv)
-{
-  Args args(argc, argv);
-
-  const auto filename = args.getTfliteFilename();
-
-  StderrReporter error_reporter;
-
-  auto model = FlatBufferModel::BuildFromFile(filename.c_str(), &error_reporter);
-
-  if (model == nullptr)
-  {
-    // error_reporter must have shown the error message already
-    return 1;
-  }
-
-  const nnfw::tflite::FlatBufferBuilder builder(*model);
-
-  try
-  {
-    const auto seed = static_cast<uint32_t>(args.getSeed());
-    auto runner = nnfw::tflite::RandomTestRunner::make(seed);
-    const auto num_runs = static_cast<size_t>(args.getNumRuns());
-    runner.compile(builder);
-    return runner.run(num_runs);
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << e.what() << std::endl;
-    return 1;
-  }
-}
diff --git a/tests/tools/nnpackage_run/CMakeLists.txt b/tests/tools/nnpackage_run/CMakeLists.txt
deleted file mode 100644
index ec45db4f6..000000000
--- a/tests/tools/nnpackage_run/CMakeLists.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-if(NOT BUILD_NNPACKAGE_RUN)
-  return()
-endif(NOT BUILD_NNPACKAGE_RUN)
-
-if(NOT BUILD_ONERT)
-  return()
-endif(NOT BUILD_ONERT)
-
-list(APPEND NNPACKAGE_RUN_SRCS "src/nnpackage_run.cc")
-list(APPEND NNPACKAGE_RUN_SRCS "src/args.cc")
-list(APPEND NNPACKAGE_RUN_SRCS "src/nnfw_util.cc")
-list(APPEND NNPACKAGE_RUN_SRCS "src/randomgen.cc")
-
-nnfw_find_package(Boost REQUIRED program_options)
-nnfw_find_package(Ruy QUIET)
-nnfw_find_package(HDF5 QUIET)
-
-if (HDF5_FOUND)
-  list(APPEND NNPACKAGE_RUN_SRCS "src/h5formatter.cc")
-endif()
-
-add_executable(nnpackage_run ${NNPACKAGE_RUN_SRCS})
-
-if (HDF5_FOUND)
-  target_compile_definitions(nnpackage_run PRIVATE ONERT_HAVE_HDF5=1)
-  target_include_directories(nnpackage_run PRIVATE ${HDF5_INCLUDE_DIRS})
-  target_link_libraries(nnpackage_run ${HDF5_CXX_LIBRARIES})
-else()
-  message(WARNING "HDF5 NOT found. Install libhdf5-dev or set EXT_HDF5_DIR to support load/dump in nnpackage_run.")
-endif(HDF5_FOUND)
-
-target_include_directories(nnpackage_run PRIVATE src)
-target_include_directories(nnpackage_run PRIVATE ${Boost_INCLUDE_DIRS})
-
-target_link_libraries(nnpackage_run onert_core onert tflite_loader)
-target_link_libraries(nnpackage_run nnfw_lib_tflite jsoncpp)
-target_link_libraries(nnpackage_run nnfw-dev)
-target_link_libraries(nnpackage_run ${Boost_PROGRAM_OPTIONS_LIBRARY})
-target_link_libraries(nnpackage_run nnfw_lib_benchmark)
-if(Ruy_FOUND AND PROFILE_RUY)
-  target_link_libraries(nnpackage_run ruy_instrumentation)
-  target_link_libraries(nnpackage_run ruy_profiler)
-endif(Ruy_FOUND AND PROFILE_RUY)
-
-install(TARGETS nnpackage_run DESTINATION bin)
diff --git a/tests/tools/onert_run/CMakeLists.txt b/tests/tools/onert_run/CMakeLists.txt
new file mode 100644
index 000000000..1d536ddc0
--- /dev/null
+++ b/tests/tools/onert_run/CMakeLists.txt
@@ -0,0 +1,45 @@
+if(NOT BUILD_ONERT_RUN)
+  return()
+endif(NOT BUILD_ONERT_RUN)
+
+if(NOT BUILD_ONERT)
+  return()
+endif(NOT BUILD_ONERT)
+
+list(APPEND ONERT_RUN_SRCS "src/onert_run.cc")
+list(APPEND ONERT_RUN_SRCS "src/args.cc")
+list(APPEND ONERT_RUN_SRCS "src/nnfw_util.cc")
+list(APPEND ONERT_RUN_SRCS "src/randomgen.cc")
+list(APPEND ONERT_RUN_SRCS "src/rawformatter.cc")
+
+nnfw_find_package(Boost REQUIRED program_options)
+nnfw_find_package(Ruy QUIET)
+nnfw_find_package(HDF5 QUIET)
+
+if (HDF5_FOUND)
+  list(APPEND ONERT_RUN_SRCS "src/h5formatter.cc")
+endif()
+
+add_executable(onert_run ${ONERT_RUN_SRCS})
+
+if (HDF5_FOUND)
+  target_compile_definitions(onert_run PRIVATE ONERT_HAVE_HDF5=1)
+  target_include_directories(onert_run PRIVATE ${HDF5_INCLUDE_DIRS})
+  target_link_libraries(onert_run ${HDF5_CXX_LIBRARIES})
+else()
+  message(WARNING "HDF5 NOT found. Install libhdf5-dev or set EXT_HDF5_DIR to support load/dump in onert_run.")
+endif(HDF5_FOUND)
+
+target_include_directories(onert_run PRIVATE src)
+target_include_directories(onert_run PRIVATE ${Boost_INCLUDE_DIRS})
+
+target_link_libraries(onert_run nnfw_lib_tflite jsoncpp)
+target_link_libraries(onert_run nnfw-dev)
+target_link_libraries(onert_run ${Boost_PROGRAM_OPTIONS_LIBRARY})
+target_link_libraries(onert_run nnfw_lib_benchmark)
+if(Ruy_FOUND AND PROFILE_RUY)
+  target_link_libraries(onert_run ruy_instrumentation)
+  target_link_libraries(onert_run ruy_profiler)
+endif(Ruy_FOUND AND PROFILE_RUY)
+
+install(TARGETS onert_run DESTINATION bin)
diff --git a/tests/tools/nnpackage_run/README.md b/tests/tools/onert_run/README.md
index 898cc84cf..9dc918ef9 100644
--- a/tests/tools/nnpackage_run/README.md
+++ b/tests/tools/onert_run/README.md
@@ -1,6 +1,6 @@
-# nnpackage_run
+# onert_run
 
-`nnpackage_run` is a tool to run `nnpackage`.
+`onert_run` is a tool to run `nnpackage`.
 
 It takes `nnpackage` as input. It uses **runtime API** internally.
 
@@ -11,7 +11,7 @@ It takes `nnpackage` as input. It uses **runtime API** internally.
 This will run with random input data
 
 ```
-$ ./nnpackage_run path_to_nnpackage_directory
+$ ./onert_run path_to_nnpackage_directory
 ```
 
 Output would look like:
diff --git a/tests/tools/nnpackage_run/src/allocation.h b/tests/tools/onert_run/src/allocation.h
index ea4672f9a..798bf9d06 100644
--- a/tests/tools/nnpackage_run/src/allocation.h
+++ b/tests/tools/onert_run/src/allocation.h
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#ifndef __NNPACKAGE_RUN_ALLOCATION_H__
-#define __NNPACKAGE_RUN_ALLOCATION_H__
+#ifndef __ONERT_RUN_ALLOCATION_H__
+#define __ONERT_RUN_ALLOCATION_H__
 
 #include <cstdlib>
 #include <cstdint>
 
-namespace nnpkg_run
+namespace onert_run
 {
 class Allocation
 {
@@ -29,9 +29,10 @@ public:
   ~Allocation() { free(data_); }
   void *data() const { return data_; }
   void *alloc(uint64_t sz) { return data_ = malloc(sz); }
+
 private:
   void *data_;
 };
-} // end of namespace
+} // namespace onert_run
 
-#endif // __NNPACKAGE_RUN_ALLOCATION_H__
+#endif // __ONERT_RUN_ALLOCATION_H__
diff --git a/tests/tools/nnpackage_run/src/args.cc b/tests/tools/onert_run/src/args.cc
index 90021bff3..a64d81db5 100644
--- a/tests/tools/nnpackage_run/src/args.cc
+++ b/tests/tools/onert_run/src/args.cc
@@ -18,6 +18,7 @@
 
 #include <functional>
 #include <iostream>
+#include <sys/stat.h>
 #include <json/json.h>
 
 namespace
@@ -56,7 +57,7 @@ std::unordered_map<uint32_t, Json::Value> argArrayToMap(const Json::Value &jsonv
 }
 
 // param shape_str is a form of, e.g., "[1, [2, 3], 3, []]" or "h5"
-void handleShapeJsonParam(nnpkg_run::TensorShapeMap &shape_map, const std::string &shape_str)
+void handleShapeJsonParam(onert_run::TensorShapeMap &shape_map, const std::string &shape_str)
 {
   Json::Value root;
   Json::Reader reader;
@@ -93,9 +94,49 @@ void handleShapeJsonParam(nnpkg_run::TensorShapeMap &shape_map, const std::strin
   }
 }
 
+void checkModelfile(const std::string &model_filename)
+{
+  if (model_filename.empty())
+  {
+    // TODO Print usage instead of the below message
+    std::cerr << "Please specify model file. Run with `--help` for usage."
+              << "\n";
+
+    exit(1);
+  }
+  else
+  {
+    if (access(model_filename.c_str(), F_OK) == -1)
+    {
+      std::cerr << "Model file not found: " << model_filename << "\n";
+      exit(1);
+    }
+  }
+}
+
+void checkPackage(const std::string &package_filename)
+{
+  if (package_filename.empty())
+  {
+    // TODO Print usage instead of the below message
+    std::cerr << "Please specify nnpackage file. Run with `--help` for usage."
+              << "\n";
+
+    exit(1);
+  }
+  else
+  {
+    if (access(package_filename.c_str(), F_OK) == -1)
+    {
+      std::cerr << "nnpackage not found: " << package_filename << "\n";
+      exit(1);
+    }
+  }
+}
+
 } // namespace
 
-namespace nnpkg_run
+namespace onert_run
 {
 
 Args::Args(const int argc, char **argv)
@@ -110,21 +151,41 @@ void Args::Initialize(void)
     _package_filename = package_filename;
 
     std::cerr << "Package Filename " << _package_filename << std::endl;
-    if (_package_filename.empty())
-    {
-      // TODO Print usage instead of the below message
-      std::cerr << "Please specify nnpackage file. Run with `--help` for usage."
-                << "\n";
+    checkPackage(package_filename);
+  };
 
-      exit(1);
-    }
-    else
+  auto process_modelfile = [&](const std::string &model_filename) {
+    _model_filename = model_filename;
+
+    std::cerr << "Model Filename " << _model_filename << std::endl;
+    checkModelfile(model_filename);
+
+    _use_single_model = true;
+  };
+
+  auto process_path = [&](const std::string &path) {
+    struct stat sb;
+    if (stat(path.c_str(), &sb) == 0)
     {
-      if (access(_package_filename.c_str(), F_OK) == -1)
+      if (sb.st_mode & S_IFDIR)
+      {
+        _package_filename = path;
+        checkPackage(path);
+        std::cerr << "Package Filename " << path << std::endl;
+      }
+      else
       {
-        std::cerr << "nnpackage not found: " << _package_filename << "\n";
+        _model_filename = path;
+        checkModelfile(path);
+        std::cerr << "Model Filename " << path << std::endl;
+        _use_single_model = true;
       }
     }
+    else
+    {
+      std::cerr << "Cannot find: " << path << "\n";
+      exit(1);
+    }
   };
 
   auto process_output_sizes = [&](const std::string &output_sizes_json_str) {
@@ -196,43 +257,57 @@ void Args::Initialize(void)
   general.add_options()
     ("help,h", "Print available options")
     ("version", "Print version and exit immediately")
-    ("nnpackage", po::value<std::string>()->required()->notifier(process_nnpackage))
+    ("nnpackage", po::value<std::string>()->notifier(process_nnpackage), "NN Package file(directory) name")
+    ("modelfile", po::value<std::string>()->notifier(process_modelfile), "NN Model filename")
+    ("path", po::value<std::string>()->notifier(process_path), "NN Package or NN Modelfile path")
 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
     ("dump,d", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_filename = v; }), "Output filename")
     ("load,l", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _load_filename = v; }), "Input filename")
 #endif
+    ("dump:raw", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_raw_filename = v; }), "Raw Output filename")
+    ("load:raw", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _load_raw_filename = v; }), "Raw Input filename")
     ("output_sizes", po::value<std::string>()->notifier(process_output_sizes),
         "The output buffer size in JSON 1D array\n"
         "If not given, the model's output sizes are used\n"
         "e.g. '[0, 40, 2, 80]' to set 0th tensor to 40 and 2nd tensor to 80.\n")
     ("num_runs,r", po::value<int>()->default_value(1)->notifier([&](const auto &v) { _num_runs = v; }), "The number of runs")
     ("warmup_runs,w", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _warmup_runs = v; }), "The number of warmup runs")
-    ("run_delay,t", po::value<int>()->default_value(-1)->notifier([&](const auto &v) { _run_delay = v; }), "Delay time(ms) between runs (as default no delay")
+    ("run_delay,t", po::value<int>()->default_value(-1)->notifier([&](const auto &v) { _run_delay = v; }), "Delay time(us) between runs (as default no delay")
     ("gpumem_poll,g", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _gpumem_poll = v; }), "Check gpu memory polling separately")
     ("mem_poll,m", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _mem_poll = v; }), "Check memory polling")
     ("write_report,p", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _write_report = v; }),
          "Write report\n"
-         "{exec}-{nnpkg}-{backend}.csv will be generated.\n"
-         "e.g. nnpackage_run-UNIT_Add_000-acl_cl.csv.\n"
-         "{nnpkg} name may be changed to realpath if you use symbolic-link.")
+         "{exec}-{nnpkg|modelfile}-{backend}.csv will be generated.\n"
+         "e.g. onert_run-UNIT_Add_000-acl_cl.csv.\n"
+         "{nnpkg|modelfile} name may be changed to realpath if you use symbolic-link.")
     ("shape_prepare", po::value<std::string>()->default_value("[]")->notifier(process_shape_prepare),
-         "set shape of specified tensor before compilation (before calling nnfw_prepare()).\n"
-         "'h5': read shape(s) from H5 input file. '--load' should also be provided.\n"
-         "'[0, [1, 2], 2, []]': set 0th tensor to [1, 2] and 2nd tensor to [].")
+         "Please refer to the description of 'shape_run'")
     ("shape_run", po::value<std::string>()->default_value("[]")->notifier(process_shape_run),
-         "set shape of specified tensor before running (before calling nnfw_run()).\n"
+         "'--shape_prepare: set shape of tensors before compilation (before calling nnfw_prepare()).\n"
+         "'--shape_run: set shape of tensors before running (before calling nnfw_run()).\n"
+         "Allowed value:.\n"
+         "'[0, [1, 2], 2, []]': set 0th tensor to [1, 2] and 2nd tensor to [] (scalar).\n"
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
          "'h5': read shape(s) from H5 input file. '--load' should also be provided.\n"
-         "'[0, [1, 2], 2, []]': set 0th tensor to [1, 2] and 2nd tensor to [].")
+         "if '--load' option is provided but '--shape_prepare' or '--shape_run' is not provided,\n"
+         "'--shape_run h5' will be used by default.\n"
+#endif
+         "For detailed description, please consutl the description of nnfw_set_input_tensorinfo()\n"
+         )
     ("verbose_level,v", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _verbose_level = v; }),
          "Verbose level\n"
          "0: prints the only result. Messages btw run don't print\n"
          "1: prints result and message btw run\n"
          "2: prints all of messages to print\n")
+    ("quantize,q", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _quantize = v; }), "Request quantization with type (int8 or int16)")
+    ("qpath", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _quantized_model_path = v; }),
+         "Path to export quantized model.\n"
+         "If it is not set, the quantized model will be exported to the same directory of the original model/package with q8/q16 suffix.")
     ;
   // clang-format on
 
   _options.add(general);
-  _positional.add("nnpackage", 1);
+  _positional.add("path", -1);
 }
 
 void Args::Parse(const int argc, char **argv)
@@ -241,19 +316,9 @@ void Args::Parse(const int argc, char **argv)
   po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
             vm);
 
-  {
-    auto conflicting_options = [&](const std::string &o1, const std::string &o2) {
-      if ((vm.count(o1) && !vm[o1].defaulted()) && (vm.count(o2) && !vm[o2].defaulted()))
-      {
-        throw boost::program_options::error(std::string("Two options '") + o1 + "' and '" + o2 +
-                                            "' cannot be given at once.");
-      }
-    };
-  }
-
   if (vm.count("help"))
   {
-    std::cout << "nnpackage_run\n\n";
+    std::cout << "onert_run\n\n";
     std::cout << "Usage: " << argv[0] << " path to nnpackage root directory [<options>]\n\n";
     std::cout << _options;
     std::cout << "\n";
@@ -267,6 +332,29 @@ void Args::Parse(const int argc, char **argv)
     return;
   }
 
+  {
+    auto conflicting_options = [&](const std::string &o1, const std::string &o2) {
+      if ((vm.count(o1) && !vm[o1].defaulted()) && (vm.count(o2) && !vm[o2].defaulted()))
+      {
+        throw boost::program_options::error(std::string("Two options '") + o1 + "' and '" + o2 +
+                                            "' cannot be given at once.");
+      }
+    };
+
+    // calling, e.g., "onert_run .. -- shape_prepare .. --shape_run .." should theoretically
+    // work but allowing both options together on command line makes the usage and implemenation
+    // of onert_run too complicated. Therefore let's not allow those option together.
+    conflicting_options("shape_prepare", "shape_run");
+
+    // Cannot use both single model file and nnpackage at once
+    conflicting_options("modelfile", "nnpackage");
+
+    // Require modelfile, nnpackage, or path
+    if (!vm.count("modelfile") && !vm.count("nnpackage") && !vm.count("path"))
+      throw boost::program_options::error(
+        std::string("Require one of options modelfile, nnpackage, or path."));
+  }
+
   try
   {
     po::notify(vm);
@@ -288,4 +376,18 @@ void Args::Parse(const int argc, char **argv)
   }
 }
 
-} // end of namespace nnpkg_run
+bool Args::shapeParamProvided()
+{
+  bool provided = false;
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+  // "--shape_run h5" or "--shape_prepare h5" was provided
+  provided = (getWhenToUseH5Shape() != WhenToUseH5Shape::NOT_PROVIDED);
+#endif
+  // specific shape was provided
+  // e.g., "--shape_run '[0, [10, 1]]'" or "--shape_prepare '[0, [10, 1]]'"
+  provided |= (!getShapeMapForPrepare().empty()) || (!getShapeMapForRun().empty());
+
+  return provided;
+}
+
+} // end of namespace onert_run
diff --git a/tests/tools/nnpackage_run/src/args.h b/tests/tools/onert_run/src/args.h
index d2b33fc82..97d9b1af1 100644
--- a/tests/tools/nnpackage_run/src/args.h
+++ b/tests/tools/onert_run/src/args.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __NNPACKAGE_RUN_ARGS_H__
-#define __NNPACKAGE_RUN_ARGS_H__
+#ifndef __ONERT_RUN_ARGS_H__
+#define __ONERT_RUN_ARGS_H__
 
 #include <string>
 #include <unordered_map>
@@ -26,7 +26,7 @@
 
 namespace po = boost::program_options;
 
-namespace nnpkg_run
+namespace onert_run
 {
 
 using TensorShapeMap = std::unordered_map<uint32_t, TensorShape>;
@@ -34,9 +34,9 @@ using TensorShapeMap = std::unordered_map<uint32_t, TensorShape>;
 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
 enum class WhenToUseH5Shape
 {
-  DO_NOT_USE, // don't use shapes in h5 file
-  PREPARE,    // read shapes in h5 file and set them as inputs' shape before calling nnfw_prepare()
-  RUN,        // read shapes in h5 file and set them as inputs' shape before calling nnfw_run()
+  NOT_PROVIDED, // Param not provided
+  PREPARE, // read shapes in h5 file and set them as inputs' shape before calling nnfw_prepare()
+  RUN,     // read shapes in h5 file and set them as inputs' shape before calling nnfw_run()
 };
 #endif
 
@@ -47,11 +47,15 @@ public:
   void print(void);
 
   const std::string &getPackageFilename(void) const { return _package_filename; }
+  const std::string &getModelFilename(void) const { return _model_filename; }
+  const bool useSingleModel(void) const { return _use_single_model; }
 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
   const std::string &getDumpFilename(void) const { return _dump_filename; }
   const std::string &getLoadFilename(void) const { return _load_filename; }
   WhenToUseH5Shape getWhenToUseH5Shape(void) const { return _when_to_use_h5_shape; }
 #endif
+  const std::string &getDumpRawFilename(void) const { return _dump_raw_filename; }
+  const std::string &getLoadRawFilename(void) const { return _load_raw_filename; }
   const int getNumRuns(void) const { return _num_runs; }
   const int getWarmupRuns(void) const { return _warmup_runs; }
   const int getRunDelay(void) const { return _run_delay; }
@@ -62,7 +66,11 @@ public:
   const bool printVersion(void) const { return _print_version; }
   TensorShapeMap &getShapeMapForPrepare() { return _shape_prepare; }
   TensorShapeMap &getShapeMapForRun() { return _shape_run; }
+  /// @brief Return true if "--shape_run" or "--shape_prepare" is provided
+  bool shapeParamProvided();
   const int getVerboseLevel(void) const { return _verbose_level; }
+  const std::string &getQuantize(void) const { return _quantize; }
+  const std::string &getQuantizedModelPath(void) const { return _quantized_model_path; }
 
 private:
   void Initialize();
@@ -73,11 +81,14 @@ private:
   po::options_description _options;
 
   std::string _package_filename;
+  std::string _model_filename;
 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
   std::string _dump_filename;
   std::string _load_filename;
-  WhenToUseH5Shape _when_to_use_h5_shape = WhenToUseH5Shape::DO_NOT_USE;
+  WhenToUseH5Shape _when_to_use_h5_shape = WhenToUseH5Shape::NOT_PROVIDED;
 #endif
+  std::string _dump_raw_filename;
+  std::string _load_raw_filename;
   TensorShapeMap _shape_prepare;
   TensorShapeMap _shape_run;
   int _num_runs;
@@ -89,8 +100,11 @@ private:
   bool _write_report;
   bool _print_version = false;
   int _verbose_level;
+  bool _use_single_model = false;
+  std::string _quantize;
+  std::string _quantized_model_path;
 };
 
-} // end of namespace nnpkg_run
+} // end of namespace onert_run
 
-#endif // __NNPACKAGE_RUN_ARGS_H__
+#endif // __ONERT_RUN_ARGS_H__
diff --git a/tests/tools/onert_run/src/formatter.h b/tests/tools/onert_run/src/formatter.h
new file mode 100644
index 000000000..5b73d2337
--- /dev/null
+++ b/tests/tools/onert_run/src/formatter.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_FORMATTER_H__
+#define __ONERT_RUN_FORMATTER_H__
+
+#include <string>
+#include <vector>
+
+#include "types.h"
+#include "allocation.h"
+
+struct nnfw_session;
+
+namespace onert_run
+{
+class Formatter
+{
+public:
+  virtual ~Formatter() = default;
+  Formatter(nnfw_session *sess) : session_(sess) {}
+  virtual void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) = 0;
+  virtual void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) = 0;
+  virtual std::vector<TensorShape> readTensorShapes(const std::string &filename)
+  {
+    return std::vector<TensorShape>();
+  };
+
+protected:
+  nnfw_session *session_;
+};
+} // namespace onert_run
+
+#endif // __ONERT_RUN_FORMATTER_H__
diff --git a/tests/tools/nnpackage_run/src/h5formatter.cc b/tests/tools/onert_run/src/h5formatter.cc
index 3929c8d90..5ea6e4c4a 100644
--- a/tests/tools/nnpackage_run/src/h5formatter.cc
+++ b/tests/tools/onert_run/src/h5formatter.cc
@@ -24,7 +24,7 @@
 
 namespace
 {
-nnpkg_run::TensorShape getShape(H5::DataSet &data_set)
+onert_run::TensorShape getShape(H5::DataSet &data_set)
 {
   std::vector<hsize_t> h5_shape; // hsize_t is unsigned long long
   H5::DataSpace data_space = data_set.getSpace();
@@ -34,7 +34,7 @@ nnpkg_run::TensorShape getShape(H5::DataSet &data_set)
   // read shape info from H5 file
   data_space.getSimpleExtentDims(h5_shape.data(), NULL);
 
-  nnpkg_run::TensorShape shape;
+  onert_run::TensorShape shape;
   for (auto dim : h5_shape)
     shape.emplace_back(static_cast<int>(dim));
 
@@ -42,7 +42,7 @@ nnpkg_run::TensorShape getShape(H5::DataSet &data_set)
 }
 } // namespace
 
-namespace nnpkg_run
+namespace onert_run
 {
 static const char *h5_value_grpname = "value";
 
@@ -135,10 +135,18 @@ void H5Formatter::loadInputs(const std::string &filename, std::vector<Allocation
             data_set.read(inputs[i].data(), H5::PredType::NATIVE_UINT8);
           else
             throw std::runtime_error(
-                "model input type is qasymm8, bool or uint8. But h5 data type is different.");
+              "model input type is qasymm8, bool or uint8. But h5 data type is different.");
           break;
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+          if (type == H5::PredType::STD_I8BE || type == H5::PredType::STD_I8LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT8);
+          else
+            throw std::runtime_error("model input type is int8. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+          throw std::runtime_error("NYI for NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED type");
         default:
-          throw std::runtime_error("nnpkg_run can load f32, i32, qasymm8, bool and uint8.");
+          throw std::runtime_error("onert_run can load f32, i32, qasymm8, bool and uint8.");
       }
       NNPR_ENSURE_STATUS(nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), bufsz));
       NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
@@ -188,21 +196,21 @@ void H5Formatter::dumpOutputs(const std::string &filename, std::vector<Allocatio
         case NNFW_TYPE_TENSOR_FLOAT32:
         {
           H5::DataSet data_set =
-              value_group.createDataSet(std::to_string(i), H5::PredType::IEEE_F32BE, data_space);
+            value_group.createDataSet(std::to_string(i), H5::PredType::IEEE_F32BE, data_space);
           data_set.write(outputs[i].data(), H5::PredType::NATIVE_FLOAT);
           break;
         }
         case NNFW_TYPE_TENSOR_INT32:
         {
           H5::DataSet data_set =
-              value_group.createDataSet(std::to_string(i), H5::PredType::STD_I32LE, data_space);
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_I32LE, data_space);
           data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT32);
           break;
         }
         case NNFW_TYPE_TENSOR_INT64:
         {
           H5::DataSet data_set =
-              value_group.createDataSet(std::to_string(i), H5::PredType::STD_I64LE, data_space);
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_I64LE, data_space);
           data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT64);
           break;
         }
@@ -210,19 +218,28 @@ void H5Formatter::dumpOutputs(const std::string &filename, std::vector<Allocatio
         case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
         {
           H5::DataSet data_set =
-              value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8BE, data_space);
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8BE, data_space);
           data_set.write(outputs[i].data(), H5::PredType::NATIVE_UINT8);
           break;
         }
         case NNFW_TYPE_TENSOR_BOOL:
         {
           H5::DataSet data_set =
-              value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8LE, data_space);
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8LE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_I8LE, data_space);
           data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
           break;
         }
+        case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+          throw std::runtime_error("NYI for NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED type");
         default:
-          throw std::runtime_error("nnpkg_run can dump f32, i32, qasymm8, bool and uint8.");
+          throw std::runtime_error("onert_run can dump f32, i32, qasymm8, bool and uint8.");
       }
     }
   }
@@ -233,9 +250,9 @@ void H5Formatter::dumpOutputs(const std::string &filename, std::vector<Allocatio
   }
   catch (const std::runtime_error &e)
   {
-    std::cerr << "Error during dumpOutputs on nnpackage_run : " << e.what() << std::endl;
+    std::cerr << "Error during dumpOutputs on onert_run : " << e.what() << std::endl;
     std::exit(-1);
   }
 };
 
-} // end of namespace nnpkg_run
+} // end of namespace onert_run
diff --git a/tests/tools/nnpackage_run/src/h5formatter.h b/tests/tools/onert_run/src/h5formatter.h
index 203ba0e72..7ebb33f2e 100644
--- a/tests/tools/nnpackage_run/src/h5formatter.h
+++ b/tests/tools/onert_run/src/h5formatter.h
@@ -14,30 +14,28 @@
  * limitations under the License.
  */
 
-#ifndef __NNPACKAGE_RUN_H5FORMATTER_H__
-#define __NNPACKAGE_RUN_H5FORMATTER_H__
+#ifndef __ONERT_RUN_H5FORMATTER_H__
+#define __ONERT_RUN_H5FORMATTER_H__
+
+#include "allocation.h"
+#include "formatter.h"
+#include "types.h"
 
 #include <string>
 #include <vector>
 
-#include "types.h"
-#include "allocation.h"
-
 struct nnfw_session;
 
-namespace nnpkg_run
+namespace onert_run
 {
-class H5Formatter
+class H5Formatter : public Formatter
 {
 public:
-  H5Formatter(nnfw_session *sess) : session_(sess) {}
-  std::vector<TensorShape> readTensorShapes(const std::string &filename);
-  void loadInputs(const std::string &filename, std::vector<Allocation> &inputs);
-  void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs);
-
-private:
-  nnfw_session *session_;
+  H5Formatter(nnfw_session *sess) : Formatter(sess) {}
+  std::vector<TensorShape> readTensorShapes(const std::string &filename) override;
+  void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) override;
+  void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) override;
 };
-} // end of namespace
+} // namespace onert_run
 
-#endif // __NNPACKAGE_RUN_H5FORMATTER_H__
+#endif // __ONERT_RUN_H5FORMATTER_H__
diff --git a/tests/tools/nnpackage_run/src/nnfw_util.cc b/tests/tools/onert_run/src/nnfw_util.cc
index 01e72f99e..0a21395fd 100644
--- a/tests/tools/nnpackage_run/src/nnfw_util.cc
+++ b/tests/tools/onert_run/src/nnfw_util.cc
@@ -18,7 +18,7 @@
 #include <string>
 #include "nnfw.h"
 
-namespace nnpkg_run
+namespace onert_run
 {
 uint64_t num_elems(const nnfw_tensorinfo *ti)
 {
@@ -34,15 +34,16 @@ uint64_t num_elems(const nnfw_tensorinfo *ti)
 uint64_t bufsize_for(const nnfw_tensorinfo *ti)
 {
   static int elmsize[] = {
-      sizeof(float),   /* NNFW_TYPE_TENSOR_FLOAT32 */
-      sizeof(int),     /* NNFW_TYPE_TENSOR_INT32 */
-      sizeof(uint8_t), /* NNFW_TYPE_TENSOR_QUANT8_ASYMM */
-      sizeof(bool),    /* NNFW_TYPE_TENSOR_BOOL = 3 */
-      sizeof(uint8_t), /* NNFW_TYPE_TENSOR_UINT8 = 4 */
-      sizeof(int64_t), /* NNFW_TYPE_TENSOR_INT64 = 5 */
-
+    sizeof(float),   /* NNFW_TYPE_TENSOR_FLOAT32 */
+    sizeof(int),     /* NNFW_TYPE_TENSOR_INT32 */
+    sizeof(uint8_t), /* NNFW_TYPE_TENSOR_QUANT8_ASYMM */
+    sizeof(bool),    /* NNFW_TYPE_TENSOR_BOOL = 3 */
+    sizeof(uint8_t), /* NNFW_TYPE_TENSOR_UINT8 = 4 */
+    sizeof(int64_t), /* NNFW_TYPE_TENSOR_INT64 = 5 */
+    sizeof(int8_t),  /* NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED = 6 */
+    sizeof(int16_t), /* NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED = 7 */
   };
   return elmsize[ti->dtype] * num_elems(ti);
 }
 
-} // end of namespace
+} // namespace onert_run
diff --git a/tests/tools/nnpackage_run/src/nnfw_util.h b/tests/tools/onert_run/src/nnfw_util.h
index 6fe547eca..1fcdfdf19 100644
--- a/tests/tools/nnpackage_run/src/nnfw_util.h
+++ b/tests/tools/onert_run/src/nnfw_util.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __NNPACKAGE_RUN_NNFW_UTIL_H__
-#define __NNPACKAGE_RUN_NNFW_UTIL_H__
+#ifndef __ONERT_RUN_NNFW_UTIL_H__
+#define __ONERT_RUN_NNFW_UTIL_H__
 
 #include "nnfw.h"
 
@@ -28,10 +28,10 @@
     }                                \
   } while (0)
 
-namespace nnpkg_run
+namespace onert_run
 {
 uint64_t num_elems(const nnfw_tensorinfo *ti);
 uint64_t bufsize_for(const nnfw_tensorinfo *ti);
-} // end of namespace nnpkg_run
+} // end of namespace onert_run
 
-#endif // __NNPACKAGE_UTIL_H__
+#endif // __ONERT_RUN_NNFW_UTIL_H__
diff --git a/tests/tools/nnpackage_run/src/nnpackage_run.cc b/tests/tools/onert_run/src/onert_run.cc
index a78e144d8..0bc64bb2b 100644
--- a/tests/tools/nnpackage_run/src/nnpackage_run.cc
+++ b/tests/tools/onert_run/src/onert_run.cc
@@ -23,11 +23,14 @@
 #include "nnfw.h"
 #include "nnfw_util.h"
 #include "nnfw_internal.h"
+#include "nnfw_experimental.h"
 #include "randomgen.h"
+#include "rawformatter.h"
 #ifdef RUY_PROFILER
 #include "ruy/profiler/profiler.h"
 #endif
 
+#include <boost/program_options.hpp>
 #include <cassert>
 #include <chrono>
 #include <cstdlib>
@@ -39,26 +42,52 @@
 
 static const char *default_backend_cand = "cpu";
 
-void overwriteShapeMap(nnpkg_run::TensorShapeMap &shape_map,
-                       std::vector<nnpkg_run::TensorShape> shapes)
+void overwriteShapeMap(onert_run::TensorShapeMap &shape_map,
+                       std::vector<onert_run::TensorShape> shapes)
 {
   for (uint32_t i = 0; i < shapes.size(); i++)
     shape_map[i] = shapes[i];
 }
 
+std::string genQuantizedModelPathFromModelPath(const std::string &model_path, bool is_q16)
+{
+  auto const extension_pos = model_path.find(".circle");
+  if (extension_pos == std::string::npos)
+  {
+    std::cerr << "Input model isn't .circle." << std::endl;
+    exit(-1);
+  }
+  auto const qstring = std::string("_quantized_") + (is_q16 ? "q16" : "q8");
+  return model_path.substr(0, extension_pos) + qstring + ".circle";
+}
+
+std::string genQuantizedModelPathFromPackagePath(const std::string &package_path, bool is_q16)
+{
+  auto package_path_without_slash = package_path;
+  if (package_path_without_slash.back() == '/')
+    package_path_without_slash.pop_back();
+  auto package_name_pos = package_path_without_slash.find_last_of('/');
+  if (package_name_pos == std::string::npos)
+    package_name_pos = 0;
+  else
+    package_name_pos++;
+  auto package_name = package_path_without_slash.substr(package_name_pos);
+  auto const qstring = std::string("_quantized_") + (is_q16 ? "q16" : "q8");
+  return package_path_without_slash + "/" + package_name + qstring + ".circle";
+}
+
 int main(const int argc, char **argv)
 {
-  using namespace nnpkg_run;
+  using namespace onert_run;
 
   try
   {
     Args args(argc, argv);
-    auto nnpackage_path = args.getPackageFilename();
     if (args.printVersion())
     {
       uint32_t version;
       NNPR_ENSURE_STATUS(nnfw_query_info_u32(NULL, NNFW_INFO_ID_VERSION, &version));
-      std::cout << "nnpkg_run (nnfw runtime: v" << (version >> 24) << "."
+      std::cout << "onert_run (nnfw runtime: v" << (version >> 24) << "."
                 << ((version & 0x0000FF00) >> 8) << "." << (version & 0xFF) << ")" << std::endl;
       exit(0);
     }
@@ -70,16 +99,51 @@ int main(const int argc, char **argv)
     // TODO Apply verbose level to phases
     const int verbose = args.getVerboseLevel();
     benchmark::Phases phases(
-        benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
+      benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
 
     nnfw_session *session = nullptr;
     NNPR_ENSURE_STATUS(nnfw_create_session(&session));
 
     // ModelLoad
     phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
-      NNPR_ENSURE_STATUS(nnfw_load_model_from_file(session, nnpackage_path.c_str()));
+      if (args.useSingleModel())
+        NNPR_ENSURE_STATUS(
+          nnfw_load_model_from_modelfile(session, args.getModelFilename().c_str()));
+      else
+        NNPR_ENSURE_STATUS(nnfw_load_model_from_file(session, args.getPackageFilename().c_str()));
     });
 
+    // Quantize model
+    auto quantize = args.getQuantize();
+    if (!quantize.empty())
+    {
+      NNFW_QUANTIZE_TYPE quantize_type = NNFW_QUANTIZE_TYPE_NOT_SET;
+      if (quantize == "int8")
+        quantize_type = NNFW_QUANTIZE_TYPE_U8_ASYM;
+      if (quantize == "int16")
+        quantize_type = NNFW_QUANTIZE_TYPE_I16_SYM;
+      NNPR_ENSURE_STATUS(nnfw_set_quantization_type(session, quantize_type));
+
+      if (args.getQuantizedModelPath() != "")
+        NNPR_ENSURE_STATUS(
+          nnfw_set_quantized_model_path(session, args.getQuantizedModelPath().c_str()));
+      else
+      {
+        if (args.useSingleModel())
+          NNPR_ENSURE_STATUS(nnfw_set_quantized_model_path(
+            session,
+            genQuantizedModelPathFromModelPath(args.getModelFilename(), quantize == "int16")
+              .c_str()));
+        else
+          NNPR_ENSURE_STATUS(nnfw_set_quantized_model_path(
+            session,
+            genQuantizedModelPathFromPackagePath(args.getPackageFilename(), quantize == "int16")
+              .c_str()));
+      }
+
+      NNPR_ENSURE_STATUS(nnfw_quantize(session));
+    }
+
     char *available_backends = std::getenv("BACKENDS");
     if (available_backends)
       NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, available_backends));
@@ -97,7 +161,7 @@ int main(const int argc, char **argv)
         nnfw_tensorinfo ti;
         NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
 
-        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_INT64)
+        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
         {
           std::cerr << "E: not supported input type" << std::endl;
           exit(-1);
@@ -114,7 +178,7 @@ int main(const int argc, char **argv)
         nnfw_tensorinfo ti;
         NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
 
-        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_INT64)
+        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
         {
           std::cerr << "E: not supported output type" << std::endl;
           exit(-1);
@@ -131,6 +195,25 @@ int main(const int argc, char **argv)
         // to fill dtype
         NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, ind, &ti));
 
+        bool set_input = false;
+        if (ti.rank != shape.size())
+        {
+          set_input = true;
+        }
+        else
+        {
+          for (int i = 0; i < ti.rank; i++)
+          {
+            if (ti.dims[i] != shape.at(i))
+            {
+              set_input = true;
+              break;
+            }
+          }
+        }
+        if (!set_input)
+          continue;
+
         ti.rank = shape.size();
         for (int i = 0; i < ti.rank; i++)
           ti.dims[i] = shape.at(i);
@@ -143,11 +226,15 @@ int main(const int argc, char **argv)
 
 // set input shape before compilation
 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+
+    auto fill_shape_from_h5 = [&session](const std::string &h5_file, TensorShapeMap &shape_map) {
+      assert(!h5_file.empty());
+      auto shapes = H5Formatter(session).readTensorShapes(h5_file);
+      overwriteShapeMap(shape_map, shapes);
+    };
+
     if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::PREPARE)
-    {
-      auto shapes = H5Formatter(session).readTensorShapes(args.getLoadFilename());
-      overwriteShapeMap(args.getShapeMapForPrepare(), shapes);
-    }
+      fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForPrepare());
 #endif
     setTensorInfo(args.getShapeMapForPrepare());
 
@@ -160,11 +247,9 @@ int main(const int argc, char **argv)
 
 // set input shape after compilation and before execution
 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
-    if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::RUN)
-    {
-      auto shapes = H5Formatter(session).readTensorShapes(args.getLoadFilename());
-      overwriteShapeMap(args.getShapeMapForRun(), shapes);
-    }
+    if (args.getWhenToUseH5Shape() == WhenToUseH5Shape::RUN ||
+        (!args.getLoadFilename().empty() && !args.shapeParamProvided()))
+      fill_shape_from_h5(args.getLoadFilename(), args.getShapeMapForRun());
 #endif
     setTensorInfo(args.getShapeMapForRun());
 
@@ -173,10 +258,15 @@ int main(const int argc, char **argv)
 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
     if (!args.getLoadFilename().empty())
       H5Formatter(session).loadInputs(args.getLoadFilename(), inputs);
+    else if (!args.getLoadRawFilename().empty())
+      RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
     else
       RandomGenerator(session).generate(inputs);
 #else
-    RandomGenerator(session).generate(inputs);
+    if (!args.getLoadRawFilename().empty())
+      RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
+    else
+      RandomGenerator(session).generate(inputs);
 #endif
 
     // prepare output
@@ -202,7 +292,7 @@ int main(const int argc, char **argv)
       }
       outputs[i].alloc(output_size_in_bytes);
       NNPR_ENSURE_STATUS(
-          nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes));
+        nnfw_set_output(session, i, ti.dtype, outputs[i].data(), output_size_in_bytes));
       NNPR_ENSURE_STATUS(nnfw_set_output_layout(session, i, NNFW_LAYOUT_CHANNELS_LAST));
     }
 
@@ -210,31 +300,35 @@ int main(const int argc, char **argv)
     // only warmup.
     if (verbose == 0)
     {
-      phases.run("WARMUP",
-                 [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
-                 args.getWarmupRuns());
-      phases.run("EXECUTE",
-                 [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
-                 args.getNumRuns(), true);
+      phases.run(
+        "WARMUP",
+        [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
+        args.getWarmupRuns());
+      phases.run(
+        "EXECUTE",
+        [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
+        args.getNumRuns(), true);
     }
     else
     {
-      phases.run("WARMUP",
-                 [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
-                 [&](const benchmark::Phase &phase, uint32_t nth) {
-                   std::cout << "... "
-                             << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
-                             << std::endl;
-                 },
-                 args.getWarmupRuns());
-      phases.run("EXECUTE",
-                 [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
-                 [&](const benchmark::Phase &phase, uint32_t nth) {
-                   std::cout << "... "
-                             << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
-                             << std::endl;
-                 },
-                 args.getNumRuns(), true);
+      phases.run(
+        "WARMUP",
+        [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
+        [&](const benchmark::Phase &phase, uint32_t nth) {
+          std::cout << "... "
+                    << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
+                    << std::endl;
+        },
+        args.getWarmupRuns());
+      phases.run(
+        "EXECUTE",
+        [&](const benchmark::Phase &, uint32_t) { NNPR_ENSURE_STATUS(nnfw_run(session)); },
+        [&](const benchmark::Phase &phase, uint32_t nth) {
+          std::cout << "... "
+                    << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
+                    << std::endl;
+        },
+        args.getNumRuns(), true);
     }
 
 #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
@@ -242,6 +336,8 @@ int main(const int argc, char **argv)
     if (!args.getDumpFilename().empty())
       H5Formatter(session).dumpOutputs(args.getDumpFilename(), outputs);
 #endif
+    if (!args.getDumpRawFilename().empty())
+      RawFormatter(session).dumpOutputs(args.getDumpRawFilename(), outputs);
 
     NNPR_ENSURE_STATUS(nnfw_close_session(session));
 
@@ -263,14 +359,15 @@ int main(const int argc, char **argv)
     std::string backend_name = (available_backends) ? available_backends : default_backend_cand;
     {
       char buf[PATH_MAX];
-      char *res = realpath(nnpackage_path.c_str(), buf);
+      char *res = args.useSingleModel() ? realpath(args.getModelFilename().c_str(), buf)
+                                        : realpath(args.getPackageFilename().c_str(), buf);
       if (res)
       {
         nnpkg_basename = basename(buf);
       }
       else
       {
-        std::cerr << "E: during getting realpath from nnpackage_path." << std::endl;
+        std::cerr << "E: during getting realpath from nnpackage or model path." << std::endl;
         exit(-1);
       }
       exec_basename = basename(argv[0]);
@@ -280,6 +377,11 @@ int main(const int argc, char **argv)
 
     return 0;
   }
+  catch (boost::program_options::error &e)
+  {
+    std::cerr << "E: " << e.what() << std::endl;
+    exit(-1);
+  }
   catch (std::runtime_error &e)
   {
     std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl;
diff --git a/tests/tools/nnpackage_run/src/randomgen.cc b/tests/tools/onert_run/src/randomgen.cc
index 343242081..1a8a5045d 100644
--- a/tests/tools/nnpackage_run/src/randomgen.cc
+++ b/tests/tools/onert_run/src/randomgen.cc
@@ -21,7 +21,7 @@
 
 #include <iostream>
 
-namespace nnpkg_run
+namespace onert_run
 {
 
 template <class T> void randomData(nnfw::misc::RandomGenerator &randgen, void *data, uint64_t size)
@@ -61,14 +61,17 @@ void RandomGenerator::generate(std::vector<Allocation> &inputs)
       case NNFW_TYPE_TENSOR_INT64:
         randomData<int64_t>(randgen, inputs[i].data(), num_elems(&ti));
         break;
+      case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+        randomData<int16_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
       default:
         std::cerr << "Not supported input type" << std::endl;
         std::exit(-1);
     }
     NNPR_ENSURE_STATUS(
-        nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), input_size_in_bytes));
+      nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), input_size_in_bytes));
     NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
   }
 };
 
-} // end of namespace nnpkg_run
+} // end of namespace onert_run
diff --git a/tests/tools/nnpackage_run/src/randomgen.h b/tests/tools/onert_run/src/randomgen.h
index 9ca51dd11..58afb4171 100644
--- a/tests/tools/nnpackage_run/src/randomgen.h
+++ b/tests/tools/onert_run/src/randomgen.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __NNPACKAGE_RUN_RANDOMGEN_H__
-#define __NNPACKAGE_RUN_RANDOMGEN_H__
+#ifndef __ONERT_RUN_RANDOMGEN_H__
+#define __ONERT_RUN_RANDOMGEN_H__
 
 #include <string>
 #include <vector>
@@ -24,7 +24,7 @@
 
 struct nnfw_session;
 
-namespace nnpkg_run
+namespace onert_run
 {
 class RandomGenerator
 {
@@ -35,6 +35,6 @@ public:
 private:
   nnfw_session *session_;
 };
-} // end of namespace
+} // namespace onert_run
 
-#endif // __NNPACKAGE_RUN_RANDOMGEN_H__
+#endif // __ONERT_RUN_RANDOMGEN_H__
diff --git a/tests/tools/onert_run/src/rawformatter.cc b/tests/tools/onert_run/src/rawformatter.cc
new file mode 100644
index 000000000..7cfab9904
--- /dev/null
+++ b/tests/tools/onert_run/src/rawformatter.cc
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rawformatter.h"
+#include "nnfw.h"
+#include "nnfw_util.h"
+
+#include <iostream>
+#include <fstream>
+#include <stdexcept>
+
+namespace onert_run
+{
+void RawFormatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
+{
+  uint32_t num_inputs;
+  NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+
+  // Support multiple inputs
+  // Option 1: Get comman-separated input file list like --load:raw a,b,c
+  // Option 2: Get prefix --load:raw in
+  //           Internally access in.0, in.1, in.2, ... in.{N-1} where N is determined by nnfw info
+  //           query api.
+  //
+  // Currently Option 2 is implemented.
+  try
+  {
+    for (uint32_t i = 0; i < num_inputs; ++i)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+
+      // allocate memory for data
+      auto bufsz = bufsize_for(&ti);
+      inputs[i].alloc(bufsz);
+
+      std::ifstream file(filename + "." + std::to_string(i), std::ios::ate | std::ios::binary);
+      auto filesz = file.tellg();
+      if (bufsz != filesz)
+      {
+        throw std::runtime_error("Input " + std::to_string(i) +
+                                 " size does not match: " + std::to_string(bufsz) +
+                                 " expected, but " + std::to_string(filesz) + " provided.");
+      }
+      file.seekg(0, std::ios::beg);
+      file.read(reinterpret_cast<char *>(inputs[i].data()), filesz);
+      file.close();
+
+      NNPR_ENSURE_STATUS(nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), bufsz));
+      NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
+    }
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << e.what() << std::endl;
+    std::exit(-1);
+  }
+};
+
+void RawFormatter::dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs)
+{
+  uint32_t num_outputs;
+  NNPR_ENSURE_STATUS(nnfw_output_size(session_, &num_outputs));
+  try
+  {
+    for (uint32_t i = 0; i < num_outputs; i++)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session_, i, &ti));
+      auto bufsz = bufsize_for(&ti);
+
+      std::ofstream file(filename + "." + std::to_string(i), std::ios::out | std::ios::binary);
+      file.write(reinterpret_cast<const char *>(outputs[i].data()), bufsz);
+      file.close();
+      std::cerr << filename + "." + std::to_string(i) + " is generated.\n";
+    }
+  }
+  catch (const std::runtime_error &e)
+  {
+    std::cerr << "Error during dumpOutputs on onert_run : " << e.what() << std::endl;
+    std::exit(-1);
+  }
+}
+} // end of namespace onert_run
diff --git a/tests/tools/onert_run/src/rawformatter.h b/tests/tools/onert_run/src/rawformatter.h
new file mode 100644
index 000000000..b6eaab66d
--- /dev/null
+++ b/tests/tools/onert_run/src/rawformatter.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_RUN_RAWFORMATTER_H__
+#define __ONERT_RUN_RAWFORMATTER_H__
+
+#include "allocation.h"
+#include "formatter.h"
+#include "types.h"
+
+#include <string>
+#include <vector>
+
+struct nnfw_session;
+
+namespace onert_run
+{
+class RawFormatter : public Formatter
+{
+public:
+  RawFormatter(nnfw_session *sess) : Formatter(sess) {}
+  void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) override;
+  void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) override;
+};
+} // namespace onert_run
+
+#endif // __ONERT_RUN_RAWFORMATTER_H__
diff --git a/tests/tools/nnpackage_run/src/types.h b/tests/tools/onert_run/src/types.h
index 93a7ab230..563c5e488 100644
--- a/tests/tools/nnpackage_run/src/types.h
+++ b/tests/tools/onert_run/src/types.h
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#ifndef __NNPACKAGE_RUN_TYPES_H__
-#define __NNPACKAGE_RUN_TYPES_H__
+#ifndef __ONERT_RUN_TYPES_H__
+#define __ONERT_RUN_TYPES_H__
 
-namespace nnpkg_run
+namespace onert_run
 {
 
 using TensorShape = std::vector<int>;
 
-} // end of namespace nnpkg_run
+} // end of namespace onert_run
 
-#endif // __NNPACKAGE_RUN_TYPES_H__
+#endif // __ONERT_RUN_TYPES_H__
diff --git a/tests/tools/onert_train/CMakeLists.txt b/tests/tools/onert_train/CMakeLists.txt
new file mode 100644
index 000000000..f047b2ad0
--- /dev/null
+++ b/tests/tools/onert_train/CMakeLists.txt
@@ -0,0 +1,60 @@
+if(NOT BUILD_ONERT_TRAIN)
+  return()
+endif(NOT BUILD_ONERT_TRAIN)
+
+if(NOT BUILD_ONERT)
+  return()
+endif(NOT BUILD_ONERT)
+
+list(APPEND ONERT_TRAIN_SRCS "src/onert_train.cc")
+list(APPEND ONERT_TRAIN_SRCS "src/args.cc")
+list(APPEND ONERT_TRAIN_SRCS "src/nnfw_util.cc")
+list(APPEND ONERT_TRAIN_SRCS "src/randomgen.cc")
+list(APPEND ONERT_TRAIN_SRCS "src/rawformatter.cc")
+list(APPEND ONERT_TRAIN_SRCS "src/rawdataloader.cc")
+
+nnfw_find_package(Boost REQUIRED program_options)
+nnfw_find_package(HDF5 QUIET)
+
+if (HDF5_FOUND)
+  list(APPEND ONERT_TRAIN_SRCS "src/h5formatter.cc")
+endif()
+
+add_executable(onert_train ${ONERT_TRAIN_SRCS})
+
+if (HDF5_FOUND)
+  target_compile_definitions(onert_train PRIVATE ONERT_HAVE_HDF5=1)
+  target_include_directories(onert_train PRIVATE ${HDF5_INCLUDE_DIRS})
+  target_link_libraries(onert_train ${HDF5_CXX_LIBRARIES})
+else()
+  message(WARNING "HDF5 NOT found. Install libhdf5-dev or set EXT_HDF5_DIR to support load/dump in onert_train.")
+endif(HDF5_FOUND)
+
+target_include_directories(onert_train PRIVATE src)
+target_include_directories(onert_train PRIVATE ${Boost_INCLUDE_DIRS})
+
+target_link_libraries(onert_train nnfw_lib_tflite jsoncpp)
+target_link_libraries(onert_train nnfw-dev)
+target_link_libraries(onert_train ${Boost_PROGRAM_OPTIONS_LIBRARY})
+target_link_libraries(onert_train nnfw_lib_benchmark)
+
+install(TARGETS onert_train DESTINATION bin)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+# Unit Tests
+set(TEST_ONERT_TRAIN test_onert_train)
+
+file(GLOB_RECURSE ONERT_TRAIN_TEST_SRCS "test/*.cc")
+list(APPEND ONERT_TRAIN_TEST_SRCS "src/rawdataloader.cc")
+list(APPEND ONERT_TRAIN_TEST_SRCS "src/nnfw_util.cc")
+
+add_executable(${TEST_ONERT_TRAIN} ${ONERT_TRAIN_TEST_SRCS})
+
+target_link_libraries(${TEST_ONERT_TRAIN} nnfw-dev)
+target_link_libraries(${TEST_ONERT_TRAIN} gtest gtest_main dl ${LIB_PTHREAD})
+
+add_test(${TEST_ONERT_TRAIN} ${TEST_ONERT_TRAIN})
+install(TARGETS ${TEST_ONERT_TRAIN} DESTINATION unittest)
diff --git a/tests/tools/onert_train/README.md b/tests/tools/onert_train/README.md
new file mode 100644
index 000000000..a201237f6
--- /dev/null
+++ b/tests/tools/onert_train/README.md
@@ -0,0 +1,13 @@
+# onert_train
+
+`onert_train` aims to train ai model. This tool trains given the ai model entered by the user using a given input and an  expected output, and stores or inference the trained model.
+
+The input models that can be supported by this tool are as follows.
+- circle
+- nnpackage
+
+## Usage
+
+### Simple train
+
+### Simple inference to trained model
diff --git a/tests/tools/onert_train/src/allocation.h b/tests/tools/onert_train/src/allocation.h
new file mode 100644
index 000000000..f5a6aa73b
--- /dev/null
+++ b/tests/tools/onert_train/src/allocation.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TRAIN_ALLOCATION_H__
+#define __ONERT_TRAIN_ALLOCATION_H__
+
+#include <cstdlib>
+#include <cstdint>
+
+namespace onert_train
+{
+class Allocation
+{
+public:
+  Allocation() : data_(nullptr) {}
+  ~Allocation() { free(data_); }
+  void *data() const { return data_; }
+  void *alloc(uint64_t sz)
+  {
+    if (data_)
+    {
+      free(data_);
+    }
+
+    return data_ = malloc(sz);
+  }
+
+private:
+  void *data_;
+};
+} // namespace onert_train
+
+#endif // __ONERT_TRAIN_ALLOCATION_H__
diff --git a/tests/tools/onert_train/src/args.cc b/tests/tools/onert_train/src/args.cc
new file mode 100644
index 000000000..dbdd384b5
--- /dev/null
+++ b/tests/tools/onert_train/src/args.cc
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "args.h"
+
+#include <functional>
+#include <iostream>
+#include <sys/stat.h>
+#include <json/json.h>
+
+namespace
+{
+
+// This function parses a json object and returns as a vector of integers
+// For example,
+// [0, [1, 2, 3, 4], 3, 40, 4, []] in JSON
+// is converted to:
+// {
+//  0 -> [1, 2, 3, 4]
+//  3 -> 40
+//  4 -> []
+// } in std::unordered_map. Note that the value type is still Json::Value.
+std::unordered_map<uint32_t, Json::Value> argArrayToMap(const Json::Value &jsonval)
+{
+  if (!jsonval.isArray() || (jsonval.size() % 2 != 0))
+  {
+    std::cerr << "JSON argument must be an even-sized array in JSON\n";
+    exit(1);
+  }
+
+  std::unordered_map<uint32_t, Json::Value> ret;
+  for (uint32_t i = 0; i < jsonval.size(); i += 2)
+  {
+    if (!jsonval[i].isUInt())
+    {
+      std::cerr << "Key values(values in even indices) must be unsigned integers\n";
+      exit(1);
+    }
+    uint32_t key = jsonval[i].asUInt();
+    Json::Value val = jsonval[i + 1];
+    ret[key] = jsonval[i + 1];
+  }
+  return ret;
+}
+
+void checkModelfile(const std::string &model_filename)
+{
+  if (model_filename.empty())
+  {
+    // TODO Print usage instead of the below message
+    std::cerr << "Please specify model file. Run with `--help` for usage."
+              << "\n";
+
+    exit(1);
+  }
+  else
+  {
+    if (access(model_filename.c_str(), F_OK) == -1)
+    {
+      std::cerr << "Model file not found: " << model_filename << "\n";
+      exit(1);
+    }
+  }
+}
+
+void checkPackage(const std::string &package_filename)
+{
+  if (package_filename.empty())
+  {
+    // TODO Print usage instead of the below message
+    std::cerr << "Please specify nnpackage file. Run with `--help` for usage."
+              << "\n";
+
+    exit(1);
+  }
+  else
+  {
+    if (access(package_filename.c_str(), F_OK) == -1)
+    {
+      std::cerr << "nnpackage not found: " << package_filename << "\n";
+      exit(1);
+    }
+  }
+}
+
+} // namespace
+
+namespace onert_train
+{
+
+Args::Args(const int argc, char **argv)
+{
+  Initialize();
+  Parse(argc, argv);
+}
+
+void Args::Initialize(void)
+{
+  auto process_nnpackage = [&](const std::string &package_filename) {
+    _package_filename = package_filename;
+
+    std::cerr << "Package Filename " << _package_filename << std::endl;
+    checkPackage(package_filename);
+  };
+
+  auto process_modelfile = [&](const std::string &model_filename) {
+    _model_filename = model_filename;
+
+    std::cerr << "Model Filename " << _model_filename << std::endl;
+    checkModelfile(model_filename);
+
+    _use_single_model = true;
+  };
+
+  auto process_path = [&](const std::string &path) {
+    struct stat sb;
+    if (stat(path.c_str(), &sb) == 0)
+    {
+      if (sb.st_mode & S_IFDIR)
+      {
+        _package_filename = path;
+        checkPackage(path);
+        std::cerr << "Package Filename " << path << std::endl;
+      }
+      else
+      {
+        _model_filename = path;
+        checkModelfile(path);
+        std::cerr << "Model Filename " << path << std::endl;
+        _use_single_model = true;
+      }
+    }
+    else
+    {
+      std::cerr << "Cannot find: " << path << "\n";
+      exit(1);
+    }
+  };
+
+  auto process_load_raw_inputfile = [&](const std::string &input_filename) {
+    _load_raw_input_filename = input_filename;
+
+    std::cerr << "Model Input Filename " << _load_raw_input_filename << std::endl;
+    checkModelfile(_load_raw_input_filename);
+  };
+
+  auto process_load_raw_expectedfile = [&](const std::string &expected_filename) {
+    _load_raw_expected_filename = expected_filename;
+
+    std::cerr << "Model Expected Filename " << _load_raw_expected_filename << std::endl;
+    checkModelfile(_load_raw_expected_filename);
+  };
+
+  auto process_output_sizes = [&](const std::string &output_sizes_json_str) {
+    Json::Value root;
+    Json::Reader reader;
+    if (!reader.parse(output_sizes_json_str, root, false))
+    {
+      std::cerr << "Invalid JSON format for output_sizes \"" << output_sizes_json_str << "\"\n";
+      exit(1);
+    }
+
+    auto arg_map = argArrayToMap(root);
+    for (auto &pair : arg_map)
+    {
+      uint32_t key = pair.first;
+      Json::Value &val_json = pair.second;
+      if (!val_json.isUInt())
+      {
+        std::cerr << "All the values in `output_sizes` must be unsigned integers\n";
+        exit(1);
+      }
+      uint32_t val = val_json.asUInt();
+      _output_sizes[key] = val;
+    }
+  };
+
+  // General options
+  po::options_description general("General options", 100);
+
+  // clang-format off
+  general.add_options()
+    ("help,h", "Print available options")
+    ("version", "Print version and exit immediately")
+    ("nnpackage", po::value<std::string>()->notifier(process_nnpackage), "NN Package file(directory) name")
+    ("modelfile", po::value<std::string>()->notifier(process_modelfile), "NN Model filename")
+    ("path", po::value<std::string>()->notifier(process_path), "NN Package or NN Modelfile path")
+    ("data_length", po::value<int>()->default_value(-1)->notifier([&](const auto &v) { _data_length = v; }), "Data length number")
+    ("load_input:raw", po::value<std::string>()->notifier(process_load_raw_inputfile),
+         "NN Model Raw Input data file\n"
+         "The datafile must have data for each input number.\n"
+         "If there are 3 inputs, the data of input0 must exist as much as data_length, "
+         "and the data for input1 and input2 must be held sequentially as data_length.\n"
+    )
+    ("load_expected:raw", po::value<std::string>()->notifier(process_load_raw_expectedfile),
+         "NN Model Raw Expected data file\n"
+         "(Same data policy with load_input:raw)\n"
+    )
+    ("mem_poll,m", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _mem_poll = v; }), "Check memory polling")
+    ("epoch", po::value<int>()->default_value(5)->notifier([&](const auto &v) { _epoch = v; }), "Epoch number (default: 5)")
+    ("batch_size", po::value<int>()->default_value(32)->notifier([&](const auto &v) { _batch_size = v; }), "Batch size (default: 32)")
+    ("learning_rate", po::value<float>()->default_value(1.0e-4)->notifier([&](const auto &v) { _learning_rate = v; }), "Learning rate (default: 1.0e-4)")
+    ("loss", po::value<int>()->default_value(0)->notifier([&] (const auto &v) { _loss_type = v; }),
+        "Loss type\n"
+        "0: MEAN_SQUARED_ERROR (default)\n"
+        "1: CATEGORICAL_CROSSENTROPY\n")
+    ("optimizer", po::value<int>()->default_value(0)->notifier([&] (const auto &v) { _optimizer_type = v; }),
+      "Optimizer type\n"
+      "0: SGD (default)\n"
+      "1: Adam\n")
+    ("verbose_level,v", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _verbose_level = v; }),
+         "Verbose level\n"
+         "0: prints the only result. Messages btw run don't print\n"
+         "1: prints result and message btw run\n"
+         "2: prints all of messages to print\n")
+    ("output_sizes", po::value<std::string>()->notifier(process_output_sizes),
+        "The output buffer size in JSON 1D array\n"
+        "If not given, the model's output sizes are used\n"
+        "e.g. '[0, 40, 2, 80]' to set 0th tensor to 40 and 2nd tensor to 80.\n")
+    ;
+  // clang-format on
+
+  _options.add(general);
+  _positional.add("path", -1);
+}
+
+void Args::Parse(const int argc, char **argv)
+{
+  po::variables_map vm;
+  po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
+            vm);
+
+  if (vm.count("help"))
+  {
+    std::cout << "onert_train\n\n";
+    std::cout << "Usage: " << argv[0] << "[model path] [<options>]\n\n";
+    std::cout << _options;
+    std::cout << "\n";
+
+    exit(0);
+  }
+
+  if (vm.count("version"))
+  {
+    _print_version = true;
+    return;
+  }
+
+  {
+    auto conflicting_options = [&](const std::string &o1, const std::string &o2) {
+      if ((vm.count(o1) && !vm[o1].defaulted()) && (vm.count(o2) && !vm[o2].defaulted()))
+      {
+        throw boost::program_options::error(std::string("Two options '") + o1 + "' and '" + o2 +
+                                            "' cannot be given at once.");
+      }
+    };
+
+    // Cannot use both single model file and nnpackage at once
+    conflicting_options("modelfile", "nnpackage");
+
+    // Require modelfile, nnpackage, or path
+    if (!vm.count("modelfile") && !vm.count("nnpackage") && !vm.count("path"))
+      throw boost::program_options::error(
+        std::string("Require one of options modelfile, nnpackage, or path."));
+  }
+
+  try
+  {
+    po::notify(vm);
+  }
+  catch (const std::bad_cast &e)
+  {
+    std::cerr << "Bad cast error - " << e.what() << '\n';
+    exit(1);
+  }
+}
+
+} // end of namespace onert_train
diff --git a/tests/tools/onert_train/src/args.h b/tests/tools/onert_train/src/args.h
new file mode 100644
index 000000000..cbd87e111
--- /dev/null
+++ b/tests/tools/onert_train/src/args.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TRAIN_ARGS_H__
+#define __ONERT_TRAIN_ARGS_H__
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include <boost/program_options.hpp>
+
+#include "types.h"
+
+namespace po = boost::program_options;
+
+namespace onert_train
+{
+
+using TensorShapeMap = std::unordered_map<uint32_t, TensorShape>;
+
+#if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
+enum class WhenToUseH5Shape
+{
+  NOT_PROVIDED, // Param not provided
+  PREPARE, // read shapes in h5 file and set them as inputs' shape before calling nnfw_prepare()
+  RUN,     // read shapes in h5 file and set them as inputs' shape before calling nnfw_run()
+};
+#endif
+
+class Args
+{
+public:
+  Args(const int argc, char **argv);
+  void print(void);
+
+  const std::string &getPackageFilename(void) const { return _package_filename; }
+  const std::string &getModelFilename(void) const { return _model_filename; }
+  const bool useSingleModel(void) const { return _use_single_model; }
+  const int getDataLength(void) const { return _data_length; }
+  const std::string &getLoadRawInputFilename(void) const { return _load_raw_input_filename; }
+  const std::string &getLoadRawExpectedFilename(void) const { return _load_raw_expected_filename; }
+  const bool getMemoryPoll(void) const { return _mem_poll; }
+  const int getEpoch(void) const { return _epoch; }
+  const int getBatchSize(void) const { return _batch_size; }
+  const float getLearningRate(void) const { return _learning_rate; }
+  const int getLossType(void) const { return _loss_type; }
+  const int getOptimizerType(void) const { return _optimizer_type; }
+  const bool printVersion(void) const { return _print_version; }
+  const int getVerboseLevel(void) const { return _verbose_level; }
+  std::unordered_map<uint32_t, uint32_t> getOutputSizes(void) const { return _output_sizes; }
+
+private:
+  void Initialize();
+  void Parse(const int argc, char **argv);
+
+private:
+  po::positional_options_description _positional;
+  po::options_description _options;
+
+  std::string _package_filename;
+  std::string _model_filename;
+  bool _use_single_model = false;
+  int _data_length;
+  std::string _load_raw_input_filename;
+  std::string _load_raw_expected_filename;
+  bool _mem_poll;
+  int _epoch;
+  int _batch_size;
+  float _learning_rate;
+  int _loss_type;
+  int _optimizer_type;
+  bool _print_version = false;
+  int _verbose_level;
+  std::unordered_map<uint32_t, uint32_t> _output_sizes;
+};
+
+} // end of namespace onert_train
+
+#endif // __ONERT_TRAIN_ARGS_H__
diff --git a/tests/tools/onert_train/src/formatter.h b/tests/tools/onert_train/src/formatter.h
new file mode 100644
index 000000000..6d256804e
--- /dev/null
+++ b/tests/tools/onert_train/src/formatter.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TRAIN_FORMATTER_H__
+#define __ONERT_TRAIN_FORMATTER_H__
+
+#include <string>
+#include <vector>
+
+#include "types.h"
+#include "allocation.h"
+
+struct nnfw_session;
+
+namespace onert_train
+{
+class Formatter
+{
+public:
+  virtual ~Formatter() = default;
+  Formatter(nnfw_session *sess) : session_(sess) {}
+  virtual void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) = 0;
+  virtual void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) = 0;
+  virtual std::vector<TensorShape> readTensorShapes(const std::string &filename)
+  {
+    return std::vector<TensorShape>();
+  };
+
+protected:
+  nnfw_session *session_;
+};
+} // namespace onert_train
+
+#endif // __ONERT_TRAIN_FORMATTER_H__
diff --git a/tests/tools/onert_train/src/h5formatter.cc b/tests/tools/onert_train/src/h5formatter.cc
new file mode 100644
index 000000000..12c570b5d
--- /dev/null
+++ b/tests/tools/onert_train/src/h5formatter.cc
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "h5formatter.h"
+#include "nnfw.h"
+#include "nnfw_util.h"
+
+#include <iostream>
+#include <stdexcept>
+#include <H5Cpp.h>
+
+namespace
+{
+onert_train::TensorShape getShape(H5::DataSet &data_set)
+{
+  std::vector<hsize_t> h5_shape; // hsize_t is unsigned long long
+  H5::DataSpace data_space = data_set.getSpace();
+  int rank = data_space.getSimpleExtentNdims();
+  h5_shape.resize(rank);
+
+  // read shape info from H5 file
+  data_space.getSimpleExtentDims(h5_shape.data(), NULL);
+
+  onert_train::TensorShape shape;
+  for (auto dim : h5_shape)
+    shape.emplace_back(static_cast<int>(dim));
+
+  return shape;
+}
+} // namespace
+
+namespace onert_train
+{
+static const char *h5_value_grpname = "value";
+
+std::vector<TensorShape> H5Formatter::readTensorShapes(const std::string &filename)
+{
+  uint32_t num_inputs;
+  NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+  std::vector<TensorShape> tensor_shapes;
+
+  try
+  {
+    H5::Exception::dontPrint();
+
+    H5::H5File file(filename, H5F_ACC_RDONLY);
+    H5::Group value_group = file.openGroup(h5_value_grpname);
+
+    // Constraints: if there are n data set names, they should be unique and
+    //              one of [ "0", "1", .. , "n-1" ]
+    for (uint32_t i = 0; i < num_inputs; ++i)
+    {
+      H5::DataSet data_set = value_group.openDataSet(std::to_string(i));
+      H5::DataType type = data_set.getDataType();
+      auto shape = getShape(data_set);
+
+      tensor_shapes.emplace_back(shape);
+    }
+
+    return tensor_shapes;
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    std::exit(-1);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << e.what() << std::endl;
+    std::exit(-1);
+  }
+}
+
+void H5Formatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
+{
+  uint32_t num_inputs;
+  NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+  try
+  {
+    // Turn off the automatic error printing.
+    H5::Exception::dontPrint();
+
+    H5::H5File file(filename, H5F_ACC_RDONLY);
+    H5::Group value_group = file.openGroup(h5_value_grpname);
+    for (uint32_t i = 0; i < num_inputs; ++i)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+
+      // TODO Add Assert(nnfw shape, h5 file shape size)
+
+      // allocate memory for data
+      auto bufsz = bufsize_for(&ti);
+      inputs[i].alloc(bufsz);
+
+      H5::DataSet data_set = value_group.openDataSet(std::to_string(i));
+      H5::DataType type = data_set.getDataType();
+      switch (ti.dtype)
+      {
+        case NNFW_TYPE_TENSOR_FLOAT32:
+          if (type == H5::PredType::IEEE_F32BE || type == H5::PredType::IEEE_F32LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_FLOAT);
+          else
+            throw std::runtime_error("model input type is f32. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_INT32:
+          if (type == H5::PredType::STD_I32BE || type == H5::PredType::STD_I32LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT32);
+          else
+            throw std::runtime_error("model input type is i32. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_INT64:
+          if (type == H5::PredType::STD_I64BE || type == H5::PredType::STD_I64LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT64);
+          else
+            throw std::runtime_error("model input type is i64. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+        case NNFW_TYPE_TENSOR_BOOL:
+        case NNFW_TYPE_TENSOR_UINT8:
+          if (type == H5::PredType::STD_U8BE || type == H5::PredType::STD_U8LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_UINT8);
+          else
+            throw std::runtime_error(
+              "model input type is qasymm8, bool or uint8. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+          if (type == H5::PredType::STD_I8BE || type == H5::PredType::STD_I8LE)
+            data_set.read(inputs[i].data(), H5::PredType::NATIVE_INT8);
+          else
+            throw std::runtime_error("model input type is int8. But h5 data type is different.");
+          break;
+        case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+          throw std::runtime_error("NYI for NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED type");
+        default:
+          throw std::runtime_error("onert_run can load f32, i32, qasymm8, bool and uint8.");
+      }
+      NNPR_ENSURE_STATUS(nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), bufsz));
+      NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
+    }
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    std::exit(-1);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << e.what() << std::endl;
+    std::exit(-1);
+  }
+};
+
+void H5Formatter::dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs)
+{
+  uint32_t num_outputs;
+  NNPR_ENSURE_STATUS(nnfw_output_size(session_, &num_outputs));
+  try
+  {
+    // Turn off the automatic error printing.
+    H5::Exception::dontPrint();
+
+    H5::H5File file(filename, H5F_ACC_TRUNC);
+    H5::Group value_group = file.createGroup(h5_value_grpname);
+    for (uint32_t i = 0; i < num_outputs; i++)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session_, i, &ti));
+      std::vector<hsize_t> dims(ti.rank);
+      for (uint32_t j = 0; j < ti.rank; ++j)
+      {
+        if (ti.dims[j] >= 0)
+          dims[j] = static_cast<hsize_t>(ti.dims[j]);
+        else
+        {
+          std::cerr << "Negative dimension in output tensor" << std::endl;
+          exit(-1);
+        }
+      }
+      H5::DataSpace data_space(ti.rank, dims.data());
+      switch (ti.dtype)
+      {
+        case NNFW_TYPE_TENSOR_FLOAT32:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::IEEE_F32BE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_FLOAT);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_INT32:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_I32LE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT32);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_INT64:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_I64LE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT64);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_UINT8:
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8BE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_UINT8);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_BOOL:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_U8LE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+        {
+          H5::DataSet data_set =
+            value_group.createDataSet(std::to_string(i), H5::PredType::STD_I8LE, data_space);
+          data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
+          break;
+        }
+        case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+          throw std::runtime_error("NYI for NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED type");
+        default:
+          throw std::runtime_error("onert_run can dump f32, i32, qasymm8, bool and uint8.");
+      }
+    }
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    std::exit(-1);
+  }
+  catch (const std::runtime_error &e)
+  {
+    std::cerr << "Error during dumpOutputs on onert_run : " << e.what() << std::endl;
+    std::exit(-1);
+  }
+};
+
+} // end of namespace onert_train
diff --git a/tests/tools/onert_train/src/h5formatter.h b/tests/tools/onert_train/src/h5formatter.h
new file mode 100644
index 000000000..21ef16526
--- /dev/null
+++ b/tests/tools/onert_train/src/h5formatter.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TRAIN_H5FORMATTER_H__
+#define __ONERT_TRAIN_H5FORMATTER_H__
+
+#include "allocation.h"
+#include "formatter.h"
+#include "types.h"
+
+#include <string>
+#include <vector>
+
+struct nnfw_session;
+
+namespace onert_train
+{
+class H5Formatter : public Formatter
+{
+public:
+  H5Formatter(nnfw_session *sess) : Formatter(sess) {}
+  std::vector<TensorShape> readTensorShapes(const std::string &filename) override;
+  void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) override;
+  void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) override;
+};
+} // namespace onert_train
+
+#endif // __ONERT_TRAIN_H5FORMATTER_H__
diff --git a/tests/tools/onert_train/src/measure.h b/tests/tools/onert_train/src/measure.h
new file mode 100644
index 000000000..f7c8610d0
--- /dev/null
+++ b/tests/tools/onert_train/src/measure.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TRAIN_MEASURE_H__
+#define __ONERT_TRAIN_MEASURE_H__
+
+#include <algorithm>
+#include <ctime>
+#include <vector>
+
+namespace
+{
+uint64_t nowMicros()
+{
+  struct timespec ts;
+  clock_gettime(CLOCK_MONOTONIC, &ts);
+  return static_cast<uint64_t>(ts.tv_nsec) / 1e3 + static_cast<uint64_t>(ts.tv_sec) * 1e6;
+}
+} // namespace
+
+namespace onert_train
+{
+
+struct Step
+{
+  uint64_t time; // us
+  // TODO Support memory usage
+};
+
+class Measure
+{
+public:
+  Measure() = default;
+
+  void set(const int epoch, const int step)
+  {
+    _results.clear();
+    _results.resize(epoch);
+    std::for_each(_results.begin(), _results.end(), [step](auto &v) { v.resize(step); });
+  }
+
+  void run(const int epoch, const int step, const std::function<void()> &func)
+  {
+    if (_results.empty() || _results.size() <= epoch || _results[epoch].size() <= step)
+    {
+      throw std::runtime_error("Please set the number of epochs and steps first");
+    }
+
+    _results[epoch][step].time = nowMicros();
+
+    func();
+
+    _results[epoch][step].time = nowMicros() - _results[epoch][step].time;
+  }
+
+  double timeMicros(const int epoch)
+  {
+    if (_results.empty() || _results.size() <= epoch)
+    {
+      throw std::runtime_error("Invalid epoch");
+    }
+
+    double sum = 0u;
+    std::for_each(_results[epoch].begin(), _results[epoch].end(),
+                  [&sum](auto &v) { sum += v.time; });
+    return sum / _results[epoch].size();
+  }
+
+  double timeMs(const int epoch) { return timeMicros(epoch) / 1e3; }
+
+private:
+  std::vector<std::vector<Step>> _results;
+};
+
+} // namespace onert_train
+
+#endif // __ONERT_TRAIN_MEASURE_H__
diff --git a/tests/tools/onert_train/src/nnfw_util.cc b/tests/tools/onert_train/src/nnfw_util.cc
new file mode 100644
index 000000000..8dd2aa871
--- /dev/null
+++ b/tests/tools/onert_train/src/nnfw_util.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cassert>
+#include <string>
+#include "nnfw.h"
+
+namespace onert_train
+{
+uint64_t num_elems(const nnfw_tensorinfo *ti)
+{
+  uint64_t n = 1;
+  for (uint32_t i = 0; i < ti->rank; ++i)
+  {
+    assert(ti->dims[i] >= 0);
+    n *= ti->dims[i];
+  }
+  return n;
+}
+
+uint64_t bufsize_for(const nnfw_tensorinfo *ti)
+{
+  static int elmsize[] = {
+    sizeof(float),   /* NNFW_TYPE_TENSOR_FLOAT32 */
+    sizeof(int),     /* NNFW_TYPE_TENSOR_INT32 */
+    sizeof(uint8_t), /* NNFW_TYPE_TENSOR_QUANT8_ASYMM */
+    sizeof(bool),    /* NNFW_TYPE_TENSOR_BOOL = 3 */
+    sizeof(uint8_t), /* NNFW_TYPE_TENSOR_UINT8 = 4 */
+    sizeof(int64_t), /* NNFW_TYPE_TENSOR_INT64 = 5 */
+    sizeof(int8_t),  /* NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED = 6 */
+    sizeof(int16_t), /* NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED = 7 */
+  };
+  return elmsize[ti->dtype] * num_elems(ti);
+}
+
+} // namespace onert_train
diff --git a/tests/tools/onert_train/src/nnfw_util.h b/tests/tools/onert_train/src/nnfw_util.h
new file mode 100644
index 000000000..674e18fb2
--- /dev/null
+++ b/tests/tools/onert_train/src/nnfw_util.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TRAIN_NNFW_UTIL_H__
+#define __ONERT_TRAIN_NNFW_UTIL_H__
+
+#include "nnfw.h"
+
+#define NNPR_ENSURE_STATUS(a)        \
+  do                                 \
+  {                                  \
+    if ((a) != NNFW_STATUS_NO_ERROR) \
+    {                                \
+      exit(-1);                      \
+    }                                \
+  } while (0)
+
+namespace onert_train
+{
+uint64_t num_elems(const nnfw_tensorinfo *ti);
+uint64_t bufsize_for(const nnfw_tensorinfo *ti);
+} // end of namespace onert_train
+
+#endif // __ONERT_TRAIN_NNFW_UTIL_H__
diff --git a/tests/tools/onert_train/src/onert_train.cc b/tests/tools/onert_train/src/onert_train.cc
new file mode 100644
index 000000000..678d13fc9
--- /dev/null
+++ b/tests/tools/onert_train/src/onert_train.cc
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "allocation.h"
+#include "args.h"
+#include "benchmark.h"
+#include "measure.h"
+#include "nnfw.h"
+#include "nnfw_util.h"
+#include "nnfw_internal.h"
+#include "nnfw_experimental.h"
+#include "randomgen.h"
+#include "rawformatter.h"
+#include "rawdataloader.h"
+
+#include <boost/program_options.hpp>
+#include <cassert>
+#include <chrono>
+#include <cstdlib>
+#include <iostream>
+#include <libgen.h>
+#include <stdexcept>
+#include <unordered_map>
+#include <vector>
+
+static const char *default_backend_cand = "train";
+
+int main(const int argc, char **argv)
+{
+  using namespace onert_train;
+
+  try
+  {
+    Args args(argc, argv);
+    if (args.printVersion())
+    {
+      uint32_t version;
+      NNPR_ENSURE_STATUS(nnfw_query_info_u32(NULL, NNFW_INFO_ID_VERSION, &version));
+      std::cout << "onert_train (nnfw runtime: v" << (version >> 24) << "."
+                << ((version & 0x0000FF00) >> 8) << "." << (version & 0xFF) << ")" << std::endl;
+      exit(0);
+    }
+
+    // TODO Apply verbose level to phases
+    const int verbose = args.getVerboseLevel();
+    benchmark::Phases phases(benchmark::PhaseOption{});
+
+    nnfw_session *session = nullptr;
+    NNPR_ENSURE_STATUS(nnfw_create_session(&session));
+
+    // ModelLoad
+    phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
+      if (args.useSingleModel())
+        NNPR_ENSURE_STATUS(
+          nnfw_load_model_from_modelfile(session, args.getModelFilename().c_str()));
+      else
+        NNPR_ENSURE_STATUS(nnfw_load_model_from_file(session, args.getPackageFilename().c_str()));
+    });
+
+    // Set training backend
+    NNPR_ENSURE_STATUS(nnfw_set_available_backends(session, default_backend_cand));
+
+    uint32_t num_inputs;
+    NNPR_ENSURE_STATUS(nnfw_input_size(session, &num_inputs));
+
+    uint32_t num_expecteds;
+    NNPR_ENSURE_STATUS(nnfw_output_size(session, &num_expecteds));
+
+    // verify input and output
+
+    auto verifyInputTypes = [session]() {
+      uint32_t sz;
+      NNPR_ENSURE_STATUS(nnfw_input_size(session, &sz));
+      for (uint32_t i = 0; i < sz; ++i)
+      {
+        nnfw_tensorinfo ti;
+        NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
+
+        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
+        {
+          std::cerr << "E: not supported input type" << std::endl;
+          exit(-1);
+        }
+      }
+    };
+
+    auto verifyOutputTypes = [session]() {
+      uint32_t sz;
+      NNPR_ENSURE_STATUS(nnfw_output_size(session, &sz));
+
+      for (uint32_t i = 0; i < sz; ++i)
+      {
+        nnfw_tensorinfo ti;
+        NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
+
+        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
+        {
+          std::cerr << "E: not supported output type" << std::endl;
+          exit(-1);
+        }
+      }
+    };
+
+    verifyInputTypes();
+    verifyOutputTypes();
+
+    auto convertLossType = [](int type) {
+      switch (type)
+      {
+        case 0:
+          return NNFW_TRAIN_LOSS_MEAN_SQUARED_ERROR;
+        case 1:
+          return NNFW_TRAIN_LOSS_CATEGORICAL_CROSSENTROPY;
+        default:
+          std::cerr << "E: not supported loss type" << std::endl;
+          exit(-1);
+      }
+    };
+
+    auto convertOptType = [](int type) {
+      switch (type)
+      {
+        case 0:
+          return NNFW_TRAIN_OPTIMIZER_SGD;
+        case 1:
+          return NNFW_TRAIN_OPTIMIZER_ADAM;
+        default:
+          std::cerr << "E: not supported optimizer type" << std::endl;
+          exit(-1);
+      }
+    };
+
+    // prepare training info
+    nnfw_train_info tri;
+    tri.batch_size = args.getBatchSize();
+    tri.learning_rate = args.getLearningRate();
+    tri.loss = convertLossType(args.getLossType());
+    tri.opt = convertOptType(args.getOptimizerType());
+
+    // prepare execution
+
+    // TODO When nnfw_{prepare|run} are failed, can't catch the time
+    phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
+      NNPR_ENSURE_STATUS(nnfw_train_prepare(session, &tri));
+    });
+
+    // prepare input and expected tensor info lists
+    std::vector<nnfw_tensorinfo> input_infos;
+    std::vector<nnfw_tensorinfo> expected_infos;
+
+    // prepare data buffers
+    std::vector<Allocation> input_data(num_inputs);
+    std::vector<Allocation> expected_data(num_expecteds);
+
+    for (uint32_t i = 0; i < num_inputs; ++i)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
+      input_data[i].alloc(bufsize_for(&ti));
+      input_infos.emplace_back(std::move(ti));
+    }
+
+    for (uint32_t i = 0; i < num_expecteds; ++i)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
+      expected_data[i].alloc(bufsize_for(&ti));
+      expected_infos.emplace_back(std::move(ti));
+    }
+
+    auto data_length = args.getDataLength();
+
+    Generator generator;
+    RawDataLoader rawDataLoader;
+
+    if (!args.getLoadRawInputFilename().empty() && !args.getLoadRawExpectedFilename().empty())
+    {
+      generator =
+        rawDataLoader.loadData(args.getLoadRawInputFilename(), args.getLoadRawExpectedFilename(),
+                               input_infos, expected_infos, data_length, tri.batch_size);
+    }
+    else
+    {
+      // TODO Use random generator
+      std::cerr << "E: not supported random input and expected generator" << std::endl;
+      exit(-1);
+    }
+
+    Measure measure;
+    std::vector<float> losses(num_expecteds);
+    phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) {
+      const int num_step = data_length / tri.batch_size;
+      const int num_epoch = args.getEpoch();
+      measure.set(num_epoch, num_step);
+      for (uint32_t epoch = 0; epoch < num_epoch; ++epoch)
+      {
+        std::fill(losses.begin(), losses.end(), 0);
+        for (uint32_t n = 0; n < num_step; ++n)
+        {
+          // get batchsize data
+          if (!generator(n, input_data, expected_data))
+            break;
+
+          // prepare input
+          for (uint32_t i = 0; i < num_inputs; ++i)
+          {
+            NNPR_ENSURE_STATUS(
+              nnfw_train_set_input(session, i, input_data[i].data(), &input_infos[i]));
+          }
+
+          // prepare output
+          for (uint32_t i = 0; i < num_expecteds; ++i)
+          {
+            NNPR_ENSURE_STATUS(
+              nnfw_train_set_expected(session, i, expected_data[i].data(), &expected_infos[i]));
+          }
+
+          // train
+          measure.run(epoch, n, [&]() { NNPR_ENSURE_STATUS(nnfw_train(session, true)); });
+
+          // store loss
+          for (int32_t i = 0; i < num_expecteds; ++i)
+          {
+            float temp = 0.f;
+            NNPR_ENSURE_STATUS(nnfw_train_get_loss(session, i, &temp));
+            losses[i] += temp;
+          }
+        }
+
+        // print loss
+        std::cout << std::fixed;
+        std::cout.precision(3);
+        std::cout << "Epoch " << epoch + 1 << "/" << num_epoch << " - " << measure.timeMs(epoch)
+                  << "ms/step - loss: ";
+        std::cout.precision(4);
+        for (uint32_t i = 0; i < num_expecteds; ++i)
+        {
+          std::cout << "[" << i << "] " << losses[i] / num_step;
+        }
+        std::cout /* << "- accuracy: " << accuracy*/ << std::endl;
+      }
+    });
+
+    NNPR_ENSURE_STATUS(nnfw_close_session(session));
+
+    // prepare result
+    benchmark::Result result(phases);
+
+    // to stdout
+    benchmark::printResult(result);
+
+    return 0;
+  }
+  catch (boost::program_options::error &e)
+  {
+    std::cerr << "E: " << e.what() << std::endl;
+    exit(-1);
+  }
+  catch (std::runtime_error &e)
+  {
+    std::cerr << "E: Fail to run by runtime error:" << e.what() << std::endl;
+    exit(-1);
+  }
+}
diff --git a/tests/tools/onert_train/src/randomgen.cc b/tests/tools/onert_train/src/randomgen.cc
new file mode 100644
index 000000000..72599cbb2
--- /dev/null
+++ b/tests/tools/onert_train/src/randomgen.cc
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "randomgen.h"
+#include "nnfw.h"
+#include "nnfw_util.h"
+#include "misc/RandomGenerator.h"
+
+#include <iostream>
+
+namespace onert_train
+{
+
+template <class T> void randomData(nnfw::misc::RandomGenerator &randgen, void *data, uint64_t size)
+{
+  for (uint64_t i = 0; i < size; i++)
+    reinterpret_cast<T *>(data)[i] = randgen.generate<T>();
+}
+
+void RandomGenerator::generate(std::vector<Allocation> &inputs)
+{
+  // generate random data
+  const int seed = 1;
+  nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
+  for (uint32_t i = 0; i < inputs.size(); ++i)
+  {
+    nnfw_tensorinfo ti;
+    NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+    auto input_size_in_bytes = bufsize_for(&ti);
+    inputs[i].alloc(input_size_in_bytes);
+    switch (ti.dtype)
+    {
+      case NNFW_TYPE_TENSOR_FLOAT32:
+        randomData<float>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+        randomData<uint8_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_BOOL:
+        randomData<bool>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_UINT8:
+        randomData<uint8_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_INT32:
+        randomData<int32_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_INT64:
+        randomData<int64_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+        randomData<int16_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
+      default:
+        std::cerr << "Not supported input type" << std::endl;
+        std::exit(-1);
+    }
+    NNPR_ENSURE_STATUS(
+      nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), input_size_in_bytes));
+    NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
+  }
+};
+
+} // end of namespace onert_train
diff --git a/tests/tools/onert_train/src/randomgen.h b/tests/tools/onert_train/src/randomgen.h
new file mode 100644
index 000000000..410c66d6f
--- /dev/null
+++ b/tests/tools/onert_train/src/randomgen.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TRAIN_RANDOMGEN_H__
+#define __ONERT_TRAIN_RANDOMGEN_H__
+
+#include <string>
+#include <vector>
+
+#include "allocation.h"
+
+struct nnfw_session;
+
+namespace onert_train
+{
+class RandomGenerator
+{
+public:
+  RandomGenerator(nnfw_session *sess) : session_(sess) {}
+  void generate(std::vector<Allocation> &inputs);
+
+private:
+  nnfw_session *session_;
+};
+} // namespace onert_train
+
+#endif // __ONERT_TRAIN_RANDOMGEN_H__
diff --git a/tests/tools/onert_train/src/rawdataloader.cc b/tests/tools/onert_train/src/rawdataloader.cc
new file mode 100644
index 000000000..a3672a9f3
--- /dev/null
+++ b/tests/tools/onert_train/src/rawdataloader.cc
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rawdataloader.h"
+#include "nnfw_util.h"
+
+#include <iostream>
+#include <stdexcept>
+#include <numeric>
+
+namespace onert_train
+{
+
+Generator RawDataLoader::loadData(const std::string &input_file, const std::string &expected_file,
+                                  const std::vector<nnfw_tensorinfo> &input_infos,
+                                  const std::vector<nnfw_tensorinfo> &expected_infos,
+                                  const uint32_t data_length, const uint32_t batch_size)
+{
+  std::vector<uint32_t> input_origins(input_infos.size());
+  uint32_t start = 0;
+  for (uint32_t i = 0; i < input_infos.size(); ++i)
+  {
+    input_origins.at(i) = start;
+    start += (bufsize_for(&input_infos[i]) / batch_size * data_length);
+  }
+
+  std::vector<uint32_t> expected_origins(expected_infos.size());
+  start = 0;
+  for (uint32_t i = 0; i < expected_infos.size(); ++i)
+  {
+    expected_origins.at(i) = start;
+    start += (bufsize_for(&expected_infos[i]) / batch_size * data_length);
+  }
+
+  try
+  {
+    _input_file = std::ifstream(input_file, std::ios::ate | std::ios::binary);
+    _expected_file = std::ifstream(expected_file, std::ios::ate | std::ios::binary);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << e.what() << std::endl;
+    std::exit(-1);
+  }
+
+  return [input_origins, expected_origins, &input_infos, &expected_infos,
+          this](uint32_t idx, std::vector<Allocation> &inputs, std::vector<Allocation> &expecteds) {
+    for (uint32_t i = 0; i < input_infos.size(); ++i)
+    {
+      auto bufsz = bufsize_for(&input_infos[i]);
+      _input_file.seekg(input_origins[i] + idx * bufsz, std::ios::beg);
+      _input_file.read(reinterpret_cast<char *>(inputs[i].data()), bufsz);
+    }
+    for (uint32_t i = 0; i < expected_infos.size(); ++i)
+    {
+      auto bufsz = bufsize_for(&expected_infos[i]);
+      _expected_file.seekg(expected_origins[i] + idx * bufsz, std::ios::beg);
+      _expected_file.read(reinterpret_cast<char *>(expecteds[i].data()), bufsz);
+    }
+    return true;
+  };
+}
+
+} // namespace onert_train
diff --git a/tests/tools/onert_train/src/rawdataloader.h b/tests/tools/onert_train/src/rawdataloader.h
new file mode 100644
index 000000000..3fb292770
--- /dev/null
+++ b/tests/tools/onert_train/src/rawdataloader.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TRAIN_RAWDATALOADER_H__
+#define __ONERT_TRAIN_RAWDATALOADER_H__
+
+#include "allocation.h"
+#include "nnfw.h"
+
+#include <functional>
+#include <string>
+#include <vector>
+#include <fstream>
+
+namespace onert_train
+{
+
+using Generator = std::function<bool(uint32_t,                  /** index **/
+                                     std::vector<Allocation> &, /** input **/
+                                     std::vector<Allocation> & /** expected **/)>;
+
+class RawDataLoader
+{
+public:
+  RawDataLoader() = default;
+  Generator loadData(const std::string &input_file, const std::string &expected_file,
+                     const std::vector<nnfw_tensorinfo> &input_infos,
+                     const std::vector<nnfw_tensorinfo> &output_infos, const uint32_t data_length,
+                     const uint32_t batch_size);
+
+private:
+  std::ifstream _input_file;
+  std::ifstream _expected_file;
+};
+
+} // namespace onert_train
+
+#endif // __ONERT_TRAIN_RAWDATALOADER_H__
diff --git a/tests/tools/onert_train/src/rawformatter.cc b/tests/tools/onert_train/src/rawformatter.cc
new file mode 100644
index 000000000..a17071684
--- /dev/null
+++ b/tests/tools/onert_train/src/rawformatter.cc
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rawformatter.h"
+#include "nnfw.h"
+#include "nnfw_util.h"
+
+#include <iostream>
+#include <fstream>
+#include <stdexcept>
+
+namespace onert_train
+{
+void RawFormatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
+{
+  uint32_t num_inputs;
+  NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+
+  // Support multiple inputs
+  // Option 1: Get comman-separated input file list like --load:raw a,b,c
+  // Option 2: Get prefix --load:raw in
+  //           Internally access in.0, in.1, in.2, ... in.{N-1} where N is determined by nnfw info
+  //           query api.
+  //
+  // Currently Option 2 is implemented.
+  try
+  {
+    for (uint32_t i = 0; i < num_inputs; ++i)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+
+      // allocate memory for data
+      auto bufsz = bufsize_for(&ti);
+      inputs[i].alloc(bufsz);
+
+      std::ifstream file(filename + "." + std::to_string(i), std::ios::ate | std::ios::binary);
+      auto filesz = file.tellg();
+      if (bufsz != filesz)
+      {
+        throw std::runtime_error("Input " + std::to_string(i) +
+                                 " size does not match: " + std::to_string(bufsz) +
+                                 " expected, but " + std::to_string(filesz) + " provided.");
+      }
+      file.seekg(0, std::ios::beg);
+      file.read(reinterpret_cast<char *>(inputs[i].data()), filesz);
+      file.close();
+
+      NNPR_ENSURE_STATUS(nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), bufsz));
+      NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
+    }
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << e.what() << std::endl;
+    std::exit(-1);
+  }
+};
+
+void RawFormatter::dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs)
+{
+  uint32_t num_outputs;
+  NNPR_ENSURE_STATUS(nnfw_output_size(session_, &num_outputs));
+  try
+  {
+    for (uint32_t i = 0; i < num_outputs; i++)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session_, i, &ti));
+      auto bufsz = bufsize_for(&ti);
+
+      std::ofstream file(filename + "." + std::to_string(i), std::ios::out | std::ios::binary);
+      file.write(reinterpret_cast<const char *>(outputs[i].data()), bufsz);
+      file.close();
+      std::cerr << filename + "." + std::to_string(i) + " is generated.\n";
+    }
+  }
+  catch (const std::runtime_error &e)
+  {
+    std::cerr << "Error during dumpOutputs on onert_run : " << e.what() << std::endl;
+    std::exit(-1);
+  }
+}
+} // end of namespace onert_train
diff --git a/tests/tools/onert_train/src/rawformatter.h b/tests/tools/onert_train/src/rawformatter.h
new file mode 100644
index 000000000..90e81b2e9
--- /dev/null
+++ b/tests/tools/onert_train/src/rawformatter.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TRAIN_RAWFORMATTER_H__
+#define __ONERT_TRAIN_RAWFORMATTER_H__
+
+#include "allocation.h"
+#include "formatter.h"
+#include "types.h"
+
+#include <string>
+#include <vector>
+
+struct nnfw_session;
+
+namespace onert_train
+{
+class RawFormatter : public Formatter
+{
+public:
+  RawFormatter(nnfw_session *sess) : Formatter(sess) {}
+  void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) override;
+  void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) override;
+};
+} // namespace onert_train
+
+#endif // __ONERT_TRAIN_RAWFORMATTER_H__
diff --git a/tests/tools/onert_train/src/types.h b/tests/tools/onert_train/src/types.h
new file mode 100644
index 000000000..6e2693016
--- /dev/null
+++ b/tests/tools/onert_train/src/types.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_TRAIN_TYPES_H__
+#define __ONERT_TRAIN_TYPES_H__
+
+namespace onert_train
+{
+
+using TensorShape = std::vector<int>;
+
+} // end of namespace onert_train
+
+#endif // __ONERT_TRAIN_TYPES_H__
diff --git a/tests/tools/onert_train/test/rawdataloader.test.cc b/tests/tools/onert_train/test/rawdataloader.test.cc
new file mode 100644
index 000000000..b2930b37e
--- /dev/null
+++ b/tests/tools/onert_train/test/rawdataloader.test.cc
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2023 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <nnfw.h>
+
+#include <gtest/gtest.h>
+#include <algorithm>
+#include <numeric>
+
+#include "../src/rawdataloader.h"
+#include "../src/nnfw_util.h"
+
+namespace
+{
+using namespace onert_train;
+
+class DataFileGenerator
+{
+public:
+  DataFileGenerator(uint32_t data_length)
+    : _data_length{data_length}, _input_file{"input.bin"}, _expected_file{"expected.bin"}
+  {
+  }
+  ~DataFileGenerator()
+  {
+    try
+    {
+      if (std::remove(_input_file.c_str()) != 0)
+      {
+        std::cerr << "Failed to remove " << _input_file << std::endl;
+      }
+      if (std::remove(_expected_file.c_str()) != 0)
+      {
+        std::cerr << "Failed to remove " << _expected_file << std::endl;
+      }
+    }
+    catch (const std::exception &e)
+    {
+      std::cerr << "Exception: " << e.what() << std::endl;
+    }
+  }
+
+  template <typename T>
+  const std::string &generateInputData(const std::vector<std::vector<T>> &data)
+  {
+    generateData(_input_file, data);
+    return _input_file;
+  }
+
+  template <typename T>
+  const std::string &generateExpectedData(const std::vector<std::vector<T>> &data)
+  {
+    generateData(_expected_file, data);
+    return _expected_file;
+  }
+
+private:
+  template <typename T>
+  void generateData(const std::string &name, const std::vector<std::vector<T>> &data)
+  {
+    try
+    {
+      std::ofstream file(name, std::ios::binary);
+      for (uint32_t i = 0; i < data.size(); ++i)
+      {
+        for (uint32_t j = 0; j < _data_length; ++j)
+        {
+          for (uint32_t k = 0; k < data[i].size(); ++k)
+          {
+            file.write(reinterpret_cast<const char *>(&data[i][k]), sizeof(data[i][k]));
+          }
+        }
+      }
+    }
+    catch (const std::exception &e)
+    {
+      std::cerr << "Exception: " << e.what() << std::endl;
+    }
+  }
+
+private:
+  uint32_t _data_length;
+  std::string _input_file;
+  std::string _expected_file;
+};
+
+class RawDataLoaderTest : public testing::Test
+{
+protected:
+  void SetUp() override { nnfw_create_session(&_session); }
+
+  void TearDown() override { nnfw_close_session(_session); }
+
+  nnfw_session *_session = nullptr;
+};
+
+TEST_F(RawDataLoaderTest, loadDatas_1)
+{
+  const uint32_t data_length = 100;
+  const uint32_t num_input = 1;
+  const uint32_t num_expected = 1;
+  const uint32_t batch_size = 16;
+
+  // Set data tensor info
+  nnfw_tensorinfo in_info = {
+    .dtype = NNFW_TYPE_TENSOR_INT32,
+    .rank = 4,
+    .dims = {batch_size, 2, 2, 2},
+  };
+  std::vector<nnfw_tensorinfo> in_infos{in_info};
+
+  nnfw_tensorinfo expected_info = {
+    .dtype = NNFW_TYPE_TENSOR_INT32,
+    .rank = 4,
+    .dims = {batch_size, 1, 1, 1},
+  };
+  std::vector<nnfw_tensorinfo> expected_infos{expected_info};
+
+  // Generate test data
+  std::vector<std::vector<uint32_t>> in(num_input);
+  for (uint32_t i = 0; i < num_input; ++i)
+  {
+    in[i].resize(num_elems(&in_infos[i]) / batch_size);
+    std::generate(in[i].begin(), in[i].end(), [this] {
+      static uint32_t i = 0;
+      return i++;
+    });
+  }
+
+  std::vector<std::vector<uint32_t>> expected(num_expected);
+  for (uint32_t i = 0; i < num_expected; ++i)
+  {
+    expected[i].resize(num_elems(&expected_infos[i]) / batch_size);
+    std::generate(expected[i].begin(), expected[i].end(), [in, i] {
+      auto sum = std::accumulate(in[i].begin(), in[i].end(), 0);
+      return sum;
+    });
+  }
+
+  // Generate test data file
+  DataFileGenerator file_gen(data_length);
+  auto &input_file = file_gen.generateInputData<uint32_t>(in);
+  auto &expected_file = file_gen.generateExpectedData<uint32_t>(expected);
+
+  // Set expected datas
+  std::vector<std::vector<uint32_t>> expected_in(num_input);
+  std::vector<std::vector<uint32_t>> expected_ex(num_expected);
+  for (uint32_t i = 0; i < num_input; ++i)
+  {
+    for (uint32_t j = 0; j < batch_size; ++j)
+    {
+      expected_in[i].insert(expected_in[i].end(), in[i].begin(), in[i].end());
+    }
+  }
+  for (uint32_t i = 0; i < num_expected; ++i)
+  {
+    for (uint32_t j = 0; j < batch_size; ++j)
+    {
+      expected_ex[i].insert(expected_ex[i].end(), expected[i].begin(), expected[i].end());
+    }
+  }
+
+  // Load test datas
+  RawDataLoader loader;
+  Generator generator =
+    loader.loadData(input_file, expected_file, in_infos, expected_infos, data_length, batch_size);
+
+  // Allocate inputs and expecteds data memory
+  std::vector<Allocation> inputs(num_input);
+  for (uint32_t i = 0; i < num_input; ++i)
+  {
+    inputs[i].alloc(bufsize_for(&in_infos[i]));
+  }
+  std::vector<Allocation> expecteds(num_expected);
+  for (uint32_t i = 0; i < num_expected; ++i)
+  {
+    expecteds[i].alloc(bufsize_for(&expected_infos[i]));
+  }
+
+  uint32_t num_sample = data_length / batch_size;
+  for (uint32_t i = 0; i < num_sample; ++i)
+  {
+    auto data = generator(i, inputs, expecteds);
+
+    std::vector<std::vector<uint32_t>> gen_in(num_input);
+    for (uint32_t h = 0; h < num_input; ++h)
+    {
+      auto num_elem = num_elems(&in_infos[h]);
+      for (uint32_t k = 0; k < num_elem; ++k)
+      {
+        auto inbufs = reinterpret_cast<uint32_t *>(inputs[h].data()) + k;
+        gen_in[h].emplace_back(*inbufs);
+      }
+    }
+    std::vector<std::vector<uint32_t>> gen_ex(num_expected);
+    for (uint32_t h = 0; h < num_expected; ++h)
+    {
+      auto num_elem = num_elems(&expected_infos[h]);
+      for (uint32_t k = 0; k < num_elem; ++k)
+      {
+        auto exbufs = reinterpret_cast<uint32_t *>(expecteds[h].data()) + k;
+        gen_ex[h].emplace_back(*exbufs);
+      }
+    }
+
+    EXPECT_EQ(gen_in, expected_in);
+    EXPECT_EQ(gen_ex, expected_ex);
+  }
+}
+
+} // namespace
diff --git a/tests/tools/tflite_benchmark_model/.FORMATDENY b/tests/tools/tflite_benchmark_model/.FORMATDENY
deleted file mode 100644
index e69de29bb..000000000
--- a/tests/tools/tflite_benchmark_model/.FORMATDENY
+++ /dev/null
diff --git a/tests/tools/tflite_benchmark_model/CMakeLists.txt b/tests/tools/tflite_benchmark_model/CMakeLists.txt
deleted file mode 100644
index 017e1da57..000000000
--- a/tests/tools/tflite_benchmark_model/CMakeLists.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-if (NOT BUILD_TFLITE_BENCHMARK_MODEL)
-  return()
-endif(NOT BUILD_TFLITE_BENCHMARK_MODEL)
-
-nnfw_find_package(TensorFlowLite EXACT 1.13.1 REQUIRED)
-
-# TODO Remove this target_compile_definitions command, and just check its presence.
-#      This change is prerequisites on pre-built tensorflow-lite package support
-target_compile_definitions(tensorflow-lite PUBLIC "TFLITE_PROFILING_ENABLED")
-
-file(GLOB_RECURSE SOURCES "*.cc")
-
-nnas_find_package(TensorFlowSource EXACT 1.13.1 REQUIRED)
-set(TENSORFLOW_LITE_BASE "${TensorFlowSource_DIR}/tensorflow/lite")
-list(APPEND SOURCES "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_main.cc"
-                    "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_model.cc"
-                    "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_params.cc"
-                    "${TENSORFLOW_LITE_BASE}/tools/benchmark/command_line_flags.cc")
-
-add_executable(tflite_benchmark_model ${SOURCES})
-target_compile_definitions(tflite_benchmark_model PUBLIC "TFLITE_PROFILING_ENABLED")
-target_link_libraries(tflite_benchmark_model nnfw_lib_misc nnfw_lib_tflite nnfw_lib_profiling)
-target_link_libraries(tflite_benchmark_model tensorflow-lite ${LIB_PTHREAD} dl)
-install(TARGETS tflite_benchmark_model DESTINATION bin)
diff --git a/tests/tools/tflite_benchmark_model/README.md b/tests/tools/tflite_benchmark_model/README.md
deleted file mode 100644
index a71a2fa1c..000000000
--- a/tests/tools/tflite_benchmark_model/README.md
+++ /dev/null
@@ -1,197 +0,0 @@
-# TFLite Model Benchmark Tool
-
-## Description
-
-A simple C++ binary to benchmark a TFLite model and its individual operators,
-both on desktop machines and on Android. The binary takes a TFLite model,
-generates random inputs and then repeatedly runs the model for specified number
-of runs. Aggregrate latency statistics are reported after running the benchmark.
-
-The instructions below are for running the binary on Desktop and Android,
-for iOS please use the
-[iOS benchmark app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/benchmark/ios).
-
-## Parameters
-
-The binary takes the following required parameters:
-
-*   `graph`: `string` \
-    The path to the TFLite model file.
-
-and the following optional parameters:
-
-*   `num_threads`: `int` (default=1) \
-    The number of threads to use for running TFLite interpreter.
-*   `warmup_runs`: `int` (default=1) \
-    The number of warmup runs to do before starting the benchmark.
-*   `num_runs`: `int` (default=50) \
-    The number of runs. Increase this to reduce variance.
-*   `run_delay`: `float` (default=-1.0) \
-    The delay in seconds between subsequent benchmark runs. Non-positive values
-    mean use no delay.
-*   `use_nnapi`: `bool` (default=false) \
-    Whether to use [Android NNAPI](https://developer.android.com/ndk/guides/neuralnetworks/).
-    This API is available on recent Android devices.
-
-## To build/install/run
-
-### On Android:
-
-(0) Refer to https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android to edit the `WORKSPACE` to configure the android NDK/SDK.
-
-(1) Build for your specific platform, e.g.:
-
-```
-bazel build -c opt \
-  --config=android_arm \
-  --cxxopt='--std=c++11' \
-  tensorflow/lite/tools/benchmark:benchmark_model
-```
-
-(2) Connect your phone. Push the binary to your phone with adb push
-     (make the directory if required):
-
-```
-adb push bazel-bin/tensorflow/lite/tools/benchmark/benchmark_model /data/local/tmp
-```
-
-(3) Make the binary executable.
-
-```
-adb shell chmod +x /data/local/tmp/benchmark_model
-```
-
-(4) Push the compute graph that you need to test. For example:
-
-```
-adb push mobilenet_quant_v1_224.tflite /data/local/tmp
-```
-
-(5) Run the benchmark. For example:
-
-```
-adb shell /data/local/tmp/benchmark_model \
-  --graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \
-  --num_threads=4
-```
-
-### On desktop:
-(1) build the binary
-
-```
-bazel build -c opt tensorflow/lite/tools/benchmark:benchmark_model
-```
-
-(2) Run on your compute graph, similar to the Android case but without the need of adb shell.
-For example:
-
-```
-bazel-bin/tensorflow/lite/tools/benchmark/benchmark_model \
-  --graph=mobilenet_quant_v1_224.tflite \
-  --num_threads=4
-```
-
-The MobileNet graph used as an example here may be downloaded from [here](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip).
-
-
-## Reducing variance between runs on Android.
-
-Most modern Android phones use [ARM big.LITTLE](https://en.wikipedia.org/wiki/ARM_big.LITTLE)
-architecture where some cores are more power hungry but faster than other cores.
-When running benchmarks on these phones there can be significant variance
-between different runs of the benchmark. One way to reduce variance between runs
-is to set the [CPU affinity](https://en.wikipedia.org/wiki/Processor_affinity)
-before running the benchmark. On Android this can be done using the `taskset`
-command.
-E.g. for running the benchmark on big cores on Pixel 2 with a single thread one
-can use the following command:
-
-```
-adb shell taskset f0 /data/local/tmp/benchmark_model \
-  --graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \
-  --num_threads=1
-```
-
-where `f0` is the affinity mask for big cores on Pixel 2.
-Note: The affinity mask varies with the device.
-
-## Profiling model operators
-The benchmark model binary also allows you to profile operators and give execution times of each operator. To do this,
-compile the binary with a compiler flag that enables profiling to be compiled in. Pass **--copt=-DTFLITE_PROFILING_ENABLED**
-to compile benchmark with profiling support.
-For example, to compile with profiling support on Android, add this flag to the previous command:
-
-```
-bazel build -c opt \
-  --config=android_arm \
-  --cxxopt='--std=c++11' \
-  --copt=-DTFLITE_PROFILING_ENABLED \
-  tensorflow/lite/tools/benchmark:benchmark_model
-```
-This compiles TFLite with profiling enabled, now you can run the benchmark binary like before. The binary will produce detailed statistics for each operation similar to those shown below:
-
-```
-
-============================== Run Order ==============================
-	             [node type]	  [start]	  [first]	 [avg ms]	     [%]	  [cdf%]	  [mem KB]	[times called]	[Name]
-	                 CONV_2D	    0.000	    4.269	    4.269	  0.107%	  0.107%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_0/Relu6]
-	       DEPTHWISE_CONV_2D	    4.270	    2.150	    2.150	  0.054%	  0.161%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_1_depthwise/Relu6]
-	                 CONV_2D	    6.421	    6.107	    6.107	  0.153%	  0.314%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   12.528	    1.366	    1.366	  0.034%	  0.348%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_2_depthwise/Relu6]
-	                 CONV_2D	   13.895	    4.195	    4.195	  0.105%	  0.454%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_2_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   18.091	    1.260	    1.260	  0.032%	  0.485%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_3_depthwise/Relu6]
-	                 CONV_2D	   19.352	    6.652	    6.652	  0.167%	  0.652%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   26.005	    0.698	    0.698	  0.018%	  0.670%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_4_depthwise/Relu6]
-	                 CONV_2D	   26.703	    3.344	    3.344	  0.084%	  0.754%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_4_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   30.047	    0.646	    0.646	  0.016%	  0.770%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_5_depthwise/Relu6]
-	                 CONV_2D	   30.694	    5.800	    5.800	  0.145%	  0.915%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   36.495	    0.331	    0.331	  0.008%	  0.924%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_6_depthwise/Relu6]
-	                 CONV_2D	   36.826	    2.838	    2.838	  0.071%	  0.995%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_6_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   39.665	    0.439	    0.439	  0.011%	  1.006%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_7_depthwise/Relu6]
-	                 CONV_2D	   40.105	    5.293	    5.293	  0.133%	  1.139%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   45.399	    0.352	    0.352	  0.009%	  1.147%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_8_depthwise/Relu6]
-	                 CONV_2D	   45.752	    5.322	    5.322	  0.133%	  1.281%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   51.075	    0.357	    0.357	  0.009%	  1.290%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_9_depthwise/Relu6]
-	                 CONV_2D	   51.432	    5.693	    5.693	  0.143%	  1.433%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   57.126	    0.366	    0.366	  0.009%	  1.442%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_10_depthwise/Relu6]
-	                 CONV_2D	   57.493	    5.472	    5.472	  0.137%	  1.579%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   62.966	    0.364	    0.364	  0.009%	  1.588%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_11_depthwise/Relu6]
-	                 CONV_2D	   63.330	    5.404	    5.404	  0.136%	  1.724%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   68.735	    0.155	    0.155	  0.004%	  1.728%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_12_depthwise/Relu6]
-	                 CONV_2D	   68.891	    2.970	    2.970	  0.074%	  1.802%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_12_pointwise/Relu6]
-	       DEPTHWISE_CONV_2D	   71.862	    0.206	    0.206	  0.005%	  1.807%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_13_depthwise/Relu6]
-	                 CONV_2D	   72.069	    5.888	    5.888	  0.148%	  1.955%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6]
-	         AVERAGE_POOL_2D	   77.958	    0.036	    0.036	  0.001%	  1.956%	     0.000	        0	[MobilenetV1/Logits/AvgPool_1a/AvgPool]
-	                 CONV_2D	   77.994	    1.445	    1.445	  0.036%	  1.992%	     0.000	        0	[MobilenetV1/Logits/Conv2d_1c_1x1/BiasAdd]
-	                 RESHAPE	   79.440	    0.002	    0.002	  0.000%	  1.992%	     0.000	        0	[MobilenetV1/Predictions/Reshape]
-	                 SOFTMAX	   79.443	    0.029	    0.029	  0.001%	  1.993%	     0.000	        0	[MobilenetV1/Predictions/Softmax]
-
-============================== Top by Computation Time ==============================
-	             [node type]	  [start]	  [first]	 [avg ms]	     [%]	  [cdf%]	  [mem KB]	[times called]	[Name]
-	                 CONV_2D	   19.352	    6.652	    6.652	  0.167%	  0.167%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6]
-	                 CONV_2D	    6.421	    6.107	    6.107	  0.153%	  0.320%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6]
-	                 CONV_2D	   72.069	    5.888	    5.888	  0.148%	  0.468%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6]
-	                 CONV_2D	   30.694	    5.800	    5.800	  0.145%	  0.613%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6]
-	                 CONV_2D	   51.432	    5.693	    5.693	  0.143%	  0.756%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6]
-	                 CONV_2D	   57.493	    5.472	    5.472	  0.137%	  0.893%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6]
-	                 CONV_2D	   63.330	    5.404	    5.404	  0.136%	  1.029%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6]
-	                 CONV_2D	   45.752	    5.322	    5.322	  0.133%	  1.162%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6]
-	                 CONV_2D	   40.105	    5.293	    5.293	  0.133%	  1.295%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6]
-	                 CONV_2D	    0.000	    4.269	    4.269	  0.107%	  1.402%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_0/Relu6]
-
-Number of nodes executed: 31
-============================== Summary by node type ==============================
-	             [Node type]	  [count]	  [avg ms]	    [avg %]	    [cdf %]	  [mem KB]	[times called]
-	                 CONV_2D	       15	     1.406	    89.270%	    89.270%	     0.000	        0
-	       DEPTHWISE_CONV_2D	       13	     0.169	    10.730%	   100.000%	     0.000	        0
-	                 SOFTMAX	        1	     0.000	     0.000%	   100.000%	     0.000	        0
-	                 RESHAPE	        1	     0.000	     0.000%	   100.000%	     0.000	        0
-	         AVERAGE_POOL_2D	        1	     0.000	     0.000%	   100.000%	     0.000	        0
-
-Timings (microseconds): count=50 first=79449 curr=81350 min=77385 max=88213 avg=79732 std=1929
-Memory (bytes): count=0
-31 nodes observed
-
-
-Average inference timings in us: Warmup: 83235, Init: 38467, no stats: 79760.9
-```
diff --git a/tests/tools/tflite_benchmark_model/benchmark_tflite_model.cc b/tests/tools/tflite_benchmark_model/benchmark_tflite_model.cc
deleted file mode 100644
index 16e85fc07..000000000
--- a/tests/tools/tflite_benchmark_model/benchmark_tflite_model.cc
+++ /dev/null
@@ -1,419 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/lite/tools/benchmark/benchmark_tflite_model.h"
-
-#include <cstdarg>
-#include <cstdlib>
-#include <iostream>
-#include <memory>
-#include <string>
-#include <unordered_set>
-#include <vector>
-
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/op_resolver.h"
-#include "tensorflow/lite/string_util.h"
-#include "tensorflow/lite/tools/benchmark/logging.h"
-
-#ifdef GEMMLOWP_PROFILING
-#include "gemmlowp/profiling/profiler.h"
-#endif
-
-// For profiling nnapi_delegate
-#include "profiling/profiling.h"
-#include "tflite/ext/nnapi_delegate.h"
-
-namespace {
-  nnfw::tflite::NNAPIDelegate nnfw_delegate_;
-}
-
-#ifdef TFLITE_CUSTOM_OPS_HEADER
-void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
-#endif
-
-namespace tflite {
-namespace benchmark {
-
-void ProfilingListener::SetInterpreter(tflite::Interpreter* interpreter) {
-  TFLITE_BENCHMARK_CHECK(interpreter);
-  interpreter_ = interpreter;
-  interpreter_->SetProfiler(&profiler_);
-}
-
-void ProfilingListener::OnSingleRunStart(RunType run_type) {
-  if (run_type == REGULAR) {
-    profiler_.Reset();
-    profiler_.StartProfiling();
-  }
-}
-
-void ProfilingListener::OnBenchmarkEnd(const BenchmarkResults& results) {
-  if (has_profiles_) {
-    TFLITE_LOG(INFO) << summarizer_.GetOutputString();
-  }
-}
-
-void ProfilingListener::OnSingleRunEnd() {
-  profiler_.StopProfiling();
-  auto profile_events = profiler_.GetProfileEvents();
-  has_profiles_ = !profile_events.empty();
-  summarizer_.ProcessProfiles(profile_events, *interpreter_);
-}
-
-void GemmlowpProfilingListener::OnBenchmarkStart(
-    const BenchmarkParams& params) {
-#ifdef GEMMLOWP_PROFILING
-  gemmlowp::RegisterCurrentThreadForProfiling();
-  gemmlowp::StartProfiling();
-#endif
-}
-
-void GemmlowpProfilingListener::OnBenchmarkEnd(
-    const BenchmarkResults& results) {
-#ifdef GEMMLOWP_PROFILING
-  gemmlowp::FinishProfiling();
-#endif
-}
-
-namespace {
-
-std::vector<std::string> Split(const std::string& str, const char delim) {
-  std::istringstream input(str);
-  std::vector<std::string> results;
-  std::string item;
-  while (std::getline(input, item, delim)) {
-    results.push_back(item);
-  }
-  return results;
-}
-
-template <typename T>
-bool SplitAndParse(const std::string& str, char delim, std::vector<T>* values) {
-  std::istringstream input(str);
-  bool first = true;
-  while (!input.eof()) {
-    if (!first) {
-      char c;
-      input >> c;
-      if (c != delim) {
-        return false;
-      }
-    } else {
-      first = false;
-    }
-    T val;
-    input >> val;
-    if (!input.eof() && !input.good()) {
-      return false;
-    }
-    values->push_back(val);
-  }
-  return true;
-}
-
-template <typename T>
-void FillRandomValue(T* ptr, const std::vector<int>& sizes,
-                     const std::function<T()>& random_func) {
-  int num_elements = 1;
-  for (int dim : sizes) {
-    num_elements *= dim;
-  }
-  for (int i = 0; i < num_elements; ++i) {
-    *ptr++ = random_func();
-  }
-}
-
-void FillRandomString(tflite::DynamicBuffer* buffer,
-                      const std::vector<int>& sizes,
-                      const std::function<string()>& random_func) {
-  int num_elements = 1;
-  for (int dim : sizes) {
-    num_elements *= dim;
-  }
-  for (int i = 0; i < num_elements; ++i) {
-    auto str = random_func();
-    buffer->AddString(str.data(), str.length());
-  }
-}
-
-bool PopulateInputLayerInfo(
-    const string& names_string, const string& shapes_string,
-    std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
-  std::vector<std::string> names = Split(names_string, ',');
-  std::vector<std::string> shapes = Split(shapes_string, ':');
-
-  if (names.size() != shapes.size()) {
-    TFLITE_LOG(ERROR) << "The number of items in"
-                      << " --input_layer_shape (" << shapes_string << ", with "
-                      << shapes.size() << " items)"
-                      << " must match the number of items in"
-                      << " --input_layer (" << names_string << ", with "
-                      << names.size() << " items)."
-                      << " For example --input_layer=input1,input2"
-                      << " --input_layer_shape=1,224,224,4:1,20";
-    return false;
-  }
-
-  for (int i = 0; i < names.size(); ++i) {
-    info->push_back(BenchmarkTfLiteModel::InputLayerInfo());
-    BenchmarkTfLiteModel::InputLayerInfo& input = info->back();
-
-    input.name = names[i];
-
-    TFLITE_BENCHMARK_CHECK(SplitAndParse(shapes[i], ',', &input.shape))
-        << "Incorrect size string specified: " << shapes[i];
-    for (int dim : input.shape) {
-      if (dim == -1) {
-        TFLITE_LOG(ERROR)
-            << "Any unknown sizes in the shapes (-1's) must be replaced"
-            << " with the size you want to benchmark with.";
-        return false;
-      }
-    }
-  }
-
-  return true;
-}
-
-std::vector<int> TfLiteIntArrayToVector(const TfLiteIntArray* int_array) {
-  std::vector<int> values;
-  values.reserve(int_array->size);
-  for (size_t i = 0; i < int_array->size; i++) {
-    values.push_back(int_array->data[i]);
-  }
-  return values;
-}
-
-}  // namespace
-
-BenchmarkParams BenchmarkTfLiteModel::DefaultParams() {
-  BenchmarkParams default_params = BenchmarkModel::DefaultParams();
-  default_params.AddParam("graph", BenchmarkParam::Create<std::string>(""));
-  default_params.AddParam("input_layer",
-                          BenchmarkParam::Create<std::string>(""));
-  default_params.AddParam("input_layer_shape",
-                          BenchmarkParam::Create<std::string>(""));
-  default_params.AddParam("use_nnapi", BenchmarkParam::Create<bool>(false));
-  return default_params;
-}
-
-BenchmarkTfLiteModel::BenchmarkTfLiteModel()
-    : BenchmarkTfLiteModel(DefaultParams()) {}
-
-BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params)
-    : BenchmarkModel(std::move(params)) {
-  AddListener(&profiling_listener_);
-  AddListener(&gemmlowp_profiling_listener_);
-}
-
-std::vector<Flag> BenchmarkTfLiteModel::GetFlags() {
-  std::vector<Flag> flags = BenchmarkTfLiteModel::BenchmarkModel::GetFlags();
-  std::vector<Flag> specific_flags = {
-      CreateFlag<std::string>("graph", &params_, "graph file name"),
-      CreateFlag<std::string>("input_layer", &params_, "input layer names"),
-      CreateFlag<std::string>("input_layer_shape", &params_,
-                              "input layer shape"),
-      CreateFlag<bool>("use_nnapi", &params_, "use nnapi api")};
-
-  flags.insert(flags.end(), specific_flags.begin(), specific_flags.end());
-  return flags;
-}
-
-void BenchmarkTfLiteModel::LogParams() {
-  BenchmarkModel::LogParams();
-  TFLITE_LOG(INFO) << "Graph: [" << params_.Get<std::string>("graph") << "]";
-  TFLITE_LOG(INFO) << "Input layers: ["
-                   << params_.Get<std::string>("input_layer") << "]";
-  TFLITE_LOG(INFO) << "Input shapes: ["
-                   << params_.Get<std::string>("input_layer_shape") << "]";
-  TFLITE_LOG(INFO) << "Use nnapi : [" << params_.Get<bool>("use_nnapi") << "]";
-}
-
-bool BenchmarkTfLiteModel::ValidateParams() {
-  if (params_.Get<std::string>("graph").empty()) {
-    TFLITE_LOG(ERROR)
-        << "Please specify the name of your TF Lite input file with --graph";
-    return false;
-  }
-  return PopulateInputLayerInfo(params_.Get<std::string>("input_layer"),
-                                params_.Get<std::string>("input_layer_shape"),
-                                &inputs);
-}
-
-uint64_t BenchmarkTfLiteModel::ComputeInputBytes() {
-  TFLITE_BENCHMARK_CHECK(interpreter);
-  uint64_t total_input_bytes = 0;
-  for (int input : interpreter->inputs()) {
-    auto* t = interpreter->tensor(input);
-    total_input_bytes += t->bytes;
-  }
-  return total_input_bytes;
-}
-
-void BenchmarkTfLiteModel::PrepareInputsAndOutputs() {
-  auto interpreter_inputs = interpreter->inputs();
-  // Set the values of the input tensors.
-  for (int j = 0; j < interpreter_inputs.size(); ++j) {
-    int i = interpreter_inputs[j];
-    TfLiteTensor* t = interpreter->tensor(i);
-    std::vector<int> sizes = TfLiteIntArrayToVector(t->dims);
-    // TODO(ahentz): below we ignore the O-th dimension (number of batches).
-    if (t->type == kTfLiteFloat32) {
-      FillRandomValue<float>(
-          interpreter->typed_tensor<float>(i),
-          std::vector<int>(sizes.begin() + 1, sizes.end()),
-          []() { return static_cast<float>(rand()) / RAND_MAX - 0.5f; });
-    } else if (t->type == kTfLiteInt32) {
-      // TODO(yunluli): This is currently only used for handling embedding input
-      // for speech models. Generalize if necessary.
-      FillRandomValue<int32_t>(
-          interpreter->typed_tensor<int32_t>(i),
-          std::vector<int32_t>(sizes.begin() + 1, sizes.end()),
-          []() { return static_cast<int32_t>(rand()) % 100; });
-    } else if (t->type == kTfLiteUInt8) {
-      FillRandomValue<uint8_t>(
-          interpreter->typed_tensor<uint8_t>(i),
-          std::vector<int>(sizes.begin() + 1, sizes.end()),
-          []() { return static_cast<uint8_t>(rand()) % 255; });
-    } else if (t->type == kTfLiteInt8) {
-      FillRandomValue<int8_t>(
-          interpreter->typed_tensor<int8_t>(i),
-          std::vector<int>(sizes.begin() + 1, sizes.end()),
-          []() { return static_cast<int8_t>(rand()) % 255 - 127; });
-    } else if (t->type == kTfLiteString) {
-      tflite::DynamicBuffer buffer;
-      FillRandomString(&buffer, sizes, []() {
-        return "we're have some friends over saturday to hang out in the yard";
-      });
-      buffer.WriteToTensor(interpreter->tensor(i), /*new_shape=*/nullptr);
-    } else {
-      TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t->name
-                        << " of type " << t->type;
-    }
-  }
-}
-
-void BenchmarkTfLiteModel::Init() {
-  std::string graph = params_.Get<std::string>("graph");
-  model = tflite::FlatBufferModel::BuildFromFile(graph.c_str());
-  if (!model) {
-    TFLITE_LOG(FATAL) << "Failed to mmap model " << graph;
-  }
-  TFLITE_LOG(INFO) << "Loaded model " << graph;
-  model->error_reporter();
-  TFLITE_LOG(INFO) << "resolved reporter";
-
-#ifdef TFLITE_CUSTOM_OPS_HEADER
-  tflite::MutableOpResolver resolver;
-  RegisterSelectedOps(&resolver);
-#else
-  nnfw::tflite::BuiltinOpResolver resolver;
-#endif
-
-  tflite::InterpreterBuilder(*model, resolver)(&interpreter);
-  if (!interpreter) {
-    TFLITE_LOG(FATAL) << "Failed to construct interpreter";
-  }
-  profiling_listener_.SetInterpreter(interpreter.get());
-  ::profiling::Context::get().setProfiler(interpreter->GetProfiler());
-
-  auto enable_sync = std::getenv("PROFILING_OP_SYNC");
-  if (enable_sync && std::strtol(enable_sync, NULL, 0) != 0)
-  {
-    ::profiling::Context::get().setSync();
-  }
-
-  const int32_t num_threads = params_.Get<int32_t>("num_threads");
-
-  if (num_threads != -1) {
-    interpreter->SetNumThreads(num_threads);
-  }
-
-  bool use_nnapi = params_.Get<bool>("use_nnapi");
-
-  interpreter->UseNNAPI(use_nnapi);
-  if (use_nnapi) {
-    if (nnfw_delegate_.BuildGraph(&(interpreter.get()->primary_subgraph())) != kTfLiteOk) {
-      TFLITE_LOG(FATAL) << "Failed to BuildGraph!";
-    }
-  }
-  ApplyDelegates();
-
-  auto interpreter_inputs = interpreter->inputs();
-
-  if (!inputs.empty()) {
-    TFLITE_BENCHMARK_CHECK_EQ(inputs.size(), interpreter_inputs.size())
-        << "Inputs mismatch: Model inputs #:" << interpreter_inputs.size()
-        << " expected: " << inputs.size();
-  }
-
-  // TFLITE_BENCHMARK_CHECK that all names and types match
-  for (int j = 0; j < inputs.size(); ++j) {
-    const InputLayerInfo& input = inputs[j];
-    int i = interpreter_inputs[j];
-    TfLiteTensor* t = interpreter->tensor(i);
-    TFLITE_BENCHMARK_CHECK_EQ(t->name, input.name)
-        << "Tensor # " << i << " is named " << t->name << " but flags call it "
-        << input.name;
-  }
-
-  // Resize all non-string tensors.
-  for (int j = 0; j < inputs.size(); ++j) {
-    const InputLayerInfo& input = inputs[j];
-    int i = interpreter_inputs[j];
-    TfLiteTensor* t = interpreter->tensor(i);
-    if (t->type != kTfLiteString) {
-      interpreter->ResizeInputTensor(i, input.shape);
-    }
-  }
-
-  if (interpreter->AllocateTensors() != kTfLiteOk) {
-    TFLITE_LOG(FATAL) << "Failed to allocate tensors!";
-  }
-}
-
-void BenchmarkTfLiteModel::RunImpl() {
-  bool use_nnapi = params_.Get<bool>("use_nnapi");
-  if (use_nnapi) {
-    if (nnfw_delegate_.Invoke(&interpreter->primary_subgraph()) != kTfLiteOk) {
-      TFLITE_LOG(FATAL) << "Failed to invoke!";
-    }
-  } else {
-    if (interpreter->Invoke() != kTfLiteOk) {
-      TFLITE_LOG(FATAL) << "Failed to invoke!";
-    }
-  }
-}
-
-}  // namespace benchmark
-}  // namespace tflite
diff --git a/tests/tools/tflite_benchmark_model/profile_summarizer.cc b/tests/tools/tflite_benchmark_model/profile_summarizer.cc
deleted file mode 100644
index b547c7095..000000000
--- a/tests/tools/tflite_benchmark_model/profile_summarizer.cc
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/lite/profiling/profile_summarizer.h"
-
-#include <sstream>
-
-#include "tensorflow/lite/schema/schema_generated.h"
-
-namespace tflite {
-namespace profiling {
-namespace {
-
-struct OperatorDetails {
-  std::string name;
-  std::vector<std::string> inputs;
-  std::vector<std::string> outputs;
-};
-
-std::string GetTensorName(const tflite::Interpreter& interpreter,
-                          int tensor_index) {
-  const auto tensor = interpreter.tensor(tensor_index);
-  if (tensor == nullptr || tensor->name == nullptr) {
-    return "Unknown";
-  }
-  return tensor->name;
-}
-std::vector<std::string> GetTensorNames(const tflite::Interpreter& interpreter,
-                                        const TfLiteIntArray* tensor_indices) {
-  std::vector<std::string> tensors;
-  tensors.reserve(tensor_indices->size);
-  for (int i = 0; i < tensor_indices->size; i++) {
-    tensors.push_back(GetTensorName(interpreter, tensor_indices->data[i]));
-  }
-  return tensors;
-}
-
-std::string ToString(const std::vector<std::string>& str_vector) {
-  std::stringstream stream;
-  stream << "[";
-  bool first = true;
-  for (const auto& s : str_vector) {
-    if (!first) {
-      stream << ", ";
-    } else {
-      first = false;
-    }
-    stream << s;
-  }
-  stream << "]";
-  return stream.str();
-}
-
-OperatorDetails GetOperatorDetails(const tflite::Interpreter& interpreter,
-                                   int node_index) {
-  auto node_reg = interpreter.node_and_registration(node_index);
-  auto inputs = node_reg->first.inputs;
-  auto outputs = node_reg->first.outputs;
-  int code = node_reg->second.builtin_code;
-  const char* op_name = nullptr;
-  if (code == tflite::BuiltinOperator_CUSTOM) {
-    const char* custom_name = node_reg->second.custom_name;
-    op_name = custom_name ? custom_name : "UnknownCustomOp";
-  } else {
-    op_name = tflite::EnumNamesBuiltinOperator()[code];
-  }
-  const char* profiling_string =
-      interpreter.OpProfilingString(node_reg->second, &node_reg->first);
-  OperatorDetails details;
-  details.name = op_name;
-  if (profiling_string) {
-    details.name += ":" + std::string(profiling_string);
-  }
-  details.inputs = GetTensorNames(interpreter, inputs);
-  details.outputs = GetTensorNames(interpreter, outputs);
-  return details;
-}
-
-tensorflow::StatSummarizerOptions GetProfileSummarizerOptions() {
-  auto options = tensorflow::StatSummarizerOptions();
-  options.show_summary = true;
-  options.show_memory = false;
-  return options;
-}
-
-}  // namespace
-
-ProfileSummarizer::ProfileSummarizer()
-    : stats_calculator_(
-          new ::tensorflow::StatsCalculator(GetProfileSummarizerOptions())) {}
-
-void ProfileSummarizer::ProcessProfiles(
-    const std::vector<const ProfileEvent*>& profile_stats,
-    const tflite::Interpreter& interpreter) {
-  std::vector<const ProfileEvent*> events;
-  std::copy_if(profile_stats.begin(), profile_stats.end(),
-               std::back_inserter(events), [](const ProfileEvent* e) {
-                 return e->event_type ==
-                            ProfileEvent::EventType::OPERATOR_INVOKE_EVENT &&
-                        e->end_timestamp_us >= e->begin_timestamp_us;
-               });
-  // Sort with begin_time.
-  std::sort(events.begin(), events.end(),
-            [](const ProfileEvent* const& a, const ProfileEvent* const& b) {
-              return a->begin_timestamp_us < b->begin_timestamp_us;
-            });
-  if (events.empty()) {
-    return;
-  }
-
-  int64_t base_start_us = events[0]->begin_timestamp_us;
-  int node_num = 0;
-  int64_t curr_total_us = 0;
-  int prev_op_idx = -1;
-  int child_op_no = 1;
-  for (auto event : events) {
-    auto op_details = GetOperatorDetails(interpreter, event->event_metadata);
-    bool from_same_op = (prev_op_idx == event->event_metadata);
-    child_op_no = from_same_op ? child_op_no + 1 : 1;
-    auto node_name = ToString(op_details.outputs) + "#" + std::to_string(child_op_no);
-    int64_t start_us = event->begin_timestamp_us - base_start_us;
-    int64_t node_exec_time =
-        event->end_timestamp_us - event->begin_timestamp_us;
-    stats_calculator_->AddNodeStats(node_name, op_details.name, node_num,
-                                    start_us, node_exec_time, 0 /*memory */);
-    curr_total_us += node_exec_time;
-    ++node_num;
-    prev_op_idx = event->event_metadata;
-  }
-  stats_calculator_->UpdateRunTotalUs(curr_total_us);
-}
-}  // namespace profiling
-}  // namespace tflite
diff --git a/tests/tools/tflite_benchmark_model/stats_calculator.cc b/tests/tools/tflite_benchmark_model/stats_calculator.cc
deleted file mode 100644
index 578650701..000000000
--- a/tests/tools/tflite_benchmark_model/stats_calculator.cc
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/util/stats_calculator.h"
-
-#include <iomanip>
-#include <map>
-#include <queue>
-#include <sstream>
-#include <string>
-#include <algorithm>
-
-namespace tensorflow {
-
-StatsCalculator::StatsCalculator(const StatSummarizerOptions& options)
-    : options_(options) {}
-
-std::string StatsCalculator::GetShortSummary() const {
-  std::stringstream stream;
-  stream << "Timings (microseconds): ";
-  run_total_us_.OutputToStream(&stream);
-  stream << std::endl;
-
-  stream << "Memory (bytes): ";
-  memory_.OutputToStream(&stream);
-  stream << std::endl;
-
-  stream << details_.size() << " nodes observed" << std::endl;
-  return stream.str();
-}
-
-std::ostream& InitField(std::ostream& stream, int width) {
-  stream << "\t" << std::right << std::setw(width) << std::fixed
-         << std::setprecision(3);
-  return stream;
-}
-
-std::string StatsCalculator::HeaderString(const std::string& title) const {
-  std::stringstream stream;
-
-  stream << "============================== " << title
-         << " ==============================" << std::endl;
-
-  InitField(stream, 24) << "[node type]";
-  InitField(stream, 9) << "[start]";
-  InitField(stream, 9) << "[first]";
-  InitField(stream, 9) << "[avg ms]";
-  InitField(stream, 8) << "[%]";
-  InitField(stream, 8) << "[cdf%]";
-  InitField(stream, 10) << "[mem KB]";
-  InitField(stream, 9) << "[times called]";
-  stream << "\t"
-         << "[Name]";
-  return stream.str();
-}
-
-std::string StatsCalculator::ColumnString(const Detail& detail,
-                                          const int64_t cumulative_stat_on_node,
-                                          const Stat<int64_t>& stat) const {
-  const double start_ms = detail.start_us.avg() / 1000.0;
-  const double first_time_ms = detail.rel_end_us.first() / 1000.0;
-  const double avg_time_ms = detail.rel_end_us.avg() / 1000.0;
-  const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum();
-  const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum();
-  const int64_t times_called = detail.times_called / num_runs();
-
-  std::stringstream stream;
-  InitField(stream, 24) << detail.type;
-  InitField(stream, 9) << start_ms;
-  InitField(stream, 9) << first_time_ms;
-  InitField(stream, 9) << avg_time_ms;
-  InitField(stream, 7) << percentage << "%";
-  InitField(stream, 7) << cdf_percentage << "%";
-  InitField(stream, 10) << detail.mem_used.newest() / 1000.0;
-  InitField(stream, 9) << times_called;
-  stream << "\t" << detail.name;
-
-  return stream.str();
-}
-
-void StatsCalculator::OrderNodesByMetric(
-    SortingMetric metric, std::vector<const Detail*>* details) const {
-  std::priority_queue<std::pair<std::string, const Detail*>> sorted_list;
-  const int num_nodes = details_.size();
-
-  for (const auto& det : details_) {
-    const Detail* detail = &(det.second);
-    std::stringstream stream;
-    stream << std::setw(20) << std::right << std::setprecision(10)
-           << std::fixed;
-
-    switch (metric) {
-      case BY_NAME:
-        stream << detail->name;
-        break;
-      case BY_RUN_ORDER:
-        stream << num_nodes - detail->run_order;
-        break;
-      case BY_TIME:
-        stream << detail->rel_end_us.avg();
-        break;
-      case BY_MEMORY:
-        stream << detail->mem_used.avg();
-        break;
-      case BY_TYPE:
-        stream << detail->type;
-        break;
-      default:
-        stream << "";
-        break;
-    }
-
-    sorted_list.emplace(stream.str(), detail);
-  }
-
-  while (!sorted_list.empty()) {
-    auto entry = sorted_list.top();
-    sorted_list.pop();
-    details->push_back(entry.second);
-  }
-}
-
-void StatsCalculator::ComputeStatsByType(
-    std::map<std::string, int64_t>* node_type_map_count,
-    std::map<std::string, int64_t>* node_type_map_time,
-    std::map<std::string, int64_t>* node_type_map_memory,
-    std::map<std::string, int64_t>* node_type_map_times_called,
-    int64_t* accumulated_us) const {
-  int64_t run_count = run_total_us_.count();
-
-  for (const auto& det : details_) {
-    const std::string node_name = det.first;
-    const Detail& detail = det.second;
-
-    int64_t curr_time_val =
-        static_cast<int64_t>(detail.rel_end_us.sum() / run_count);
-    *accumulated_us += curr_time_val;
-
-    int64_t curr_memory_val = detail.mem_used.newest();
-
-    const std::string& node_type = detail.type;
-
-    const std::string sharp1("#1");
-    bool first = std::mismatch(sharp1.rbegin(), sharp1.rend(), node_name.rbegin()).first == sharp1.rend();
-
-    if (first) {
-      (*node_type_map_count)[node_type] += 1;
-      (*node_type_map_times_called)[node_type] += detail.times_called / run_count;
-    }
-    (*node_type_map_time)[node_type] += curr_time_val;
-    (*node_type_map_memory)[node_type] += curr_memory_val;
-  }
-}
-
-std::string StatsCalculator::GetStatsByNodeType() const {
-  std::stringstream stream;
-
-  stream << "Number of nodes executed: " << details_.size() << std::endl;
-
-  stream << "============================== Summary by node type "
-            "=============================="
-         << std::endl;
-
-  std::map<std::string, int64_t> node_type_map_count;
-  std::map<std::string, int64_t> node_type_map_time;
-  std::map<std::string, int64_t> node_type_map_memory;
-  std::map<std::string, int64_t> node_type_map_times_called;
-  int64_t accumulated_us = 0;
-
-  ComputeStatsByType(&node_type_map_count, &node_type_map_time,
-                     &node_type_map_memory, &node_type_map_times_called,
-                     &accumulated_us);
-
-  // Sort them.
-  std::priority_queue<std::pair<int64_t, std::pair<std::string, int64_t>>>
-      timings;
-  for (const auto& node_type : node_type_map_time) {
-    const int64_t mem_used = node_type_map_memory[node_type.first];
-    timings.emplace(node_type.second,
-                    std::pair<std::string, int64_t>(node_type.first, mem_used));
-  }
-
-  InitField(stream, 24) << "[Node type]";
-  InitField(stream, 9) << "[count]";
-  InitField(stream, 10) << "[avg ms]";
-  InitField(stream, 11) << "[avg %]";
-  InitField(stream, 11) << "[cdf %]";
-  InitField(stream, 10) << "[mem KB]";
-  InitField(stream, 10) << "[times called]";
-  stream << std::endl;
-
-  float cdf = 0.0f;
-  while (!timings.empty()) {
-    auto entry = timings.top();
-    timings.pop();
-
-    const std::string node_type = entry.second.first;
-    const float memory = entry.second.second / 1000.0f;
-
-    const int64_t node_type_total_us = entry.first;
-    const float time_per_run_ms = node_type_total_us / 1000.0f;
-
-    const float percentage =
-        ((entry.first / static_cast<float>(accumulated_us)) * 100.0f);
-    cdf += percentage;
-
-    InitField(stream, 24) << node_type;
-    InitField(stream, 9) << node_type_map_count[node_type];
-    InitField(stream, 10) << time_per_run_ms;
-    InitField(stream, 10) << percentage << "%";
-    InitField(stream, 10) << cdf << "%";
-    InitField(stream, 10) << memory;
-    InitField(stream, 9) << node_type_map_times_called[node_type];
-    stream << std::endl;
-  }
-  stream << std::endl;
-  return stream.str();
-}
-
-std::string StatsCalculator::GetStatsByMetric(const std::string& title,
-                                              SortingMetric sorting_metric,
-                                              int num_stats) const {
-  std::vector<const Detail*> details;
-  OrderNodesByMetric(sorting_metric, &details);
-
-  double cumulative_stat_on_node = 0;
-
-  std::stringstream stream;
-  stream << HeaderString(title) << std::endl;
-  int stat_num = 0;
-  for (auto detail : details) {
-    ++stat_num;
-    if (num_stats > 0 && stat_num > num_stats) {
-      break;
-    }
-
-    // TODO(andrewharp): Make this keep track of the particular metric for cdf.
-    cumulative_stat_on_node += detail->rel_end_us.sum();
-    stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_)
-           << std::endl;
-  }
-  stream << std::endl;
-  return stream.str();
-}
-
-std::string StatsCalculator::GetOutputString() const {
-  std::stringstream stream;
-  if (options_.show_run_order) {
-    stream << GetStatsByMetric("Run Order", BY_RUN_ORDER,
-                               options_.run_order_limit);
-  }
-  if (options_.show_time) {
-    stream << GetStatsByMetric("Top by Computation Time", BY_TIME,
-                               options_.time_limit);
-  }
-  if (options_.show_memory) {
-    stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY,
-                               options_.memory_limit);
-  }
-  if (options_.show_type) {
-    stream << GetStatsByNodeType();
-  }
-  if (options_.show_summary) {
-    stream << GetShortSummary() << std::endl;
-  }
-  return stream.str();
-}
-
-void StatsCalculator::AddNodeStats(const std::string& name,
-                                   const std::string& type, int64_t run_order,
-                                   int64_t start_us, int64_t rel_end_us,
-                                   int64_t mem_used) {
-  Detail* detail = nullptr;
-  if (details_.find(name) == details_.end()) {
-    details_.insert({name, {}});
-    detail = &details_.at(name);
-    detail->type = type;
-    detail->name = name;
-    detail->run_order = run_order;
-  } else {
-    detail = &details_.at(name);
-  }
-  detail->start_us.UpdateStat(start_us);
-  detail->rel_end_us.UpdateStat(rel_end_us);
-  detail->mem_used.UpdateStat(mem_used);
-  detail->times_called++;
-}
-
-}  // namespace tensorflow
diff --git a/tests/tools/tflite_comparator/CMakeLists.txt b/tests/tools/tflite_comparator/CMakeLists.txt
new file mode 100644
index 000000000..54e3f61fd
--- /dev/null
+++ b/tests/tools/tflite_comparator/CMakeLists.txt
@@ -0,0 +1,23 @@
+if(NOT BUILD_TFLITE_COMPARATOR_TEST_TOOL)
+  message("skipping tflite comparator tool build")
+  return()
+endif(NOT BUILD_TFLITE_COMPARATOR_TEST_TOOL)
+
+if(NOT BUILD_ONERT)
+  message("skipping tflite comparator tool build: onert is not built")
+  return()
+endif(NOT BUILD_ONERT)
+
+list(APPEND SOURCES "src/tflite_comparator.cc")
+list(APPEND SOURCES "src/args.cc")
+
+nnfw_find_package(Boost REQUIRED program_options system filesystem)
+
+add_executable(tflite_comparator ${SOURCES})
+target_include_directories(tflite_comparator PRIVATE ${Boost_INCLUDE_DIRS})
+
+target_link_libraries(tflite_comparator nnfw-dev)
+target_link_libraries(tflite_comparator nnfw_lib_tflite nnfw_lib_misc)
+target_link_libraries(tflite_comparator ${Boost_PROGRAM_OPTIONS_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY})
+
+install(TARGETS tflite_comparator DESTINATION bin)
diff --git a/tests/tools/tflite_loader/src/args.cc b/tests/tools/tflite_comparator/src/args.cc
index e9fb141ca..ecab20b17 100644
--- a/tests/tools/tflite_loader/src/args.cc
+++ b/tests/tools/tflite_comparator/src/args.cc
@@ -47,8 +47,8 @@ void Args::Initialize(void)
 
 void Args::print(char **argv)
 {
-  std::cout << "tflite_loader" << std::endl << std::endl;
-  std::cout << "Load tflite model by Loader and TFLite and compare their output" << std::endl;
+  std::cout << "tflite_comparator" << std::endl << std::endl;
+  std::cout << "Load tflite model by onert and TFLite, and compare their output" << std::endl;
   std::cout << "Usage:" << std::endl;
   std::cout << argv[0] << " --tflite model_file.tflite --data input_data.dat" << std::endl;
   std::cout << _options;
diff --git a/tests/tools/tflite_loader/src/args.h b/tests/tools/tflite_comparator/src/args.h
index 4d0e8ff41..4d0e8ff41 100644
--- a/tests/tools/tflite_loader/src/args.h
+++ b/tests/tools/tflite_comparator/src/args.h
diff --git a/tests/tools/tflite_comparator/src/tflite_comparator.cc b/tests/tools/tflite_comparator/src/tflite_comparator.cc
new file mode 100644
index 000000000..383a4e4de
--- /dev/null
+++ b/tests/tools/tflite_comparator/src/tflite_comparator.cc
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "args.h"
+
+#include <nnfw_experimental.h>
+#include <nnfw_internal.h>
+
+#include <misc/EnvVar.h>
+#include <misc/fp32.h>
+#include <misc/RandomGenerator.h>
+
+#include <tflite/Assert.h>
+#include <tflite/InterpreterSession.h>
+
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <string>
+
+const int RUN_FAILED = 1;
+
+using namespace nnfw::tflite;
+
+const int FILE_ERROR = 2;
+
+#define NNFW_ASSERT_FAIL(expr, msg)   \
+  if ((expr) != NNFW_STATUS_NO_ERROR) \
+  {                                   \
+    std::cerr << msg << std::endl;    \
+    exit(-1);                         \
+  }
+
+// Read vector of floats from selected file
+void readData(const std::string &path, std::vector<uint8_t> &dest)
+{
+  std::ifstream in(path);
+  if (!in.good())
+  {
+    std::cerr << "can not open data file " << path << "\n";
+    exit(FILE_ERROR);
+  }
+  in.seekg(0, std::ifstream::end);
+  size_t len = in.tellg();
+  in.seekg(0, std::ifstream::beg);
+
+  assert(dest.size() == len);
+  in.read(reinterpret_cast<char *>(dest.data()), len);
+}
+
+template <typename T>
+void randomData(nnfw::misc::RandomGenerator &randgen, std::vector<uint8_t> &dest)
+{
+  size_t elements = dest.size() / sizeof(T);
+  assert(dest.size() % sizeof(T) == 0);
+
+  std::vector<T> vec(elements);
+  for (uint64_t i = 0; i < elements; i++)
+  {
+    vec[i] = randgen.generate<T>();
+  }
+  memcpy(dest.data(), vec.data(), elements * sizeof(T));
+}
+
+void randomBoolData(nnfw::misc::RandomGenerator &randgen, std::vector<uint8_t> &dest)
+{
+  size_t elements = dest.size();
+  std::vector<uint8_t> vec(elements);
+  for (uint64_t i = 0; i < elements; i++)
+  {
+    bool value = randgen.generate<bool>();
+    dest[i] = value ? 1 : 0;
+  }
+}
+
+inline uint64_t num_elems(const nnfw_tensorinfo *ti)
+{
+  uint64_t n = 1;
+  for (uint32_t i = 0; i < ti->rank; ++i)
+  {
+    n *= ti->dims[i];
+  }
+  return n;
+}
+
+inline size_t sizeOfNnfwType(NNFW_TYPE type)
+{
+  switch (type)
+  {
+    case NNFW_TYPE_TENSOR_BOOL:
+    case NNFW_TYPE_TENSOR_UINT8:
+    case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+    case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+      return 1;
+    case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+      return 2;
+    case NNFW_TYPE_TENSOR_FLOAT32:
+    case NNFW_TYPE_TENSOR_INT32:
+      return 4;
+    case NNFW_TYPE_TENSOR_INT64:
+      return 8;
+    default:
+      throw std::runtime_error{"Invalid tensor type"};
+  }
+}
+
+template <typename T>
+bool isClose(const T *ref_buf, const std::vector<uint8_t> &act_buf, uint32_t index)
+{
+  // TODO better way for handling quant error?
+  auto tolerance = static_cast<uint64_t>(nnfw::misc::EnvVar("TOLERANCE").asInt(0));
+  bool match = true;
+
+  for (uint32_t e = 0; e < act_buf.size() / sizeof(T); e++)
+  {
+    T ref = ref_buf[e];
+    T act = reinterpret_cast<const T *>(act_buf.data())[e];
+    uint64_t diff = static_cast<uint64_t>(((ref > act) ? (ref - act) : (act - ref)));
+
+    if (ref != act && diff > tolerance)
+    {
+      std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref
+                << ", act: " << act << " (diff: " << diff << ")" << std::endl;
+      match = false;
+    }
+  }
+
+  return match;
+}
+
+template <>
+bool isClose<float>(const float *ref_buf, const std::vector<uint8_t> &act_buf, uint32_t index)
+{
+  uint32_t tolerance = nnfw::misc::EnvVar("TOLERANCE").asInt(1);
+  bool match = true;
+
+  for (uint32_t e = 0; e < act_buf.size() / sizeof(float); e++)
+  {
+    float ref = ref_buf[e];
+    float act = reinterpret_cast<const float *>(act_buf.data())[e];
+    float diff = std::fabs(ref - act);
+
+    bool match_elem = nnfw::misc::fp32::absolute_epsilon_equal(ref, act)
+                        ? true
+                        : nnfw::misc::fp32::epsilon_equal(ref, act, tolerance);
+
+    if (!match_elem)
+    {
+      std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref
+                << ", act: " << act << " (diff: " << diff << ")" << std::endl;
+      match = false;
+    }
+  }
+
+  return match;
+}
+
+bool exact(const uint8_t *ref_buf, const std::vector<uint8_t> &act_buf, uint32_t index)
+{
+  bool match = true;
+  for (uint32_t e = 0; e < act_buf.size() / sizeof(uint8_t); e++)
+  {
+    uint8_t ref_raw = ref_buf[e];
+    bool ref = (ref_raw != 0 ? true : false);
+    uint8_t act_raw = reinterpret_cast<const uint8_t *>(act_buf.data())[e];
+    bool act = (act_raw != 0 ? true : false);
+    if (ref != act)
+    {
+      std::cerr << "Output #" << index << ", Element Index : " << e << ", ref: " << ref
+                << ", act: " << act << std::endl;
+      match = false;
+    }
+  }
+
+  return match;
+}
+
+int main(const int argc, char **argv)
+{
+  TFLiteRun::Args args(argc, argv);
+
+  auto tflite_file = args.getTFLiteFilename();
+  auto data_files = args.getDataFilenames();
+
+  if (tflite_file.empty())
+  {
+    args.print(argv);
+    return RUN_FAILED;
+  }
+
+  std::cout << "[Execution] Stage start!" << std::endl;
+  // Loading
+  nnfw_session *onert_session = nullptr;
+  NNFW_ASSERT_FAIL(nnfw_create_session(&onert_session), "[ ERROR ] Failure during model load");
+  if (onert_session == nullptr)
+  {
+    std::cerr << "[ ERROR ] Failure to open session" << std::endl;
+    exit(-1);
+  }
+
+  NNFW_ASSERT_FAIL(nnfw_load_model_from_modelfile(onert_session, tflite_file.c_str()),
+                   "[ ERROR ] Failure during model load");
+
+  uint32_t num_inputs;
+  uint32_t num_outputs;
+  NNFW_ASSERT_FAIL(nnfw_input_size(onert_session, &num_inputs),
+                   "[ ERROR ] Failure during get model inputs");
+  NNFW_ASSERT_FAIL(nnfw_output_size(onert_session, &num_outputs),
+                   "[ ERROR ] Failure during get model outputs");
+
+  std::cout << "[Execution] Model is deserialized!" << std::endl;
+
+  // Compile
+  nnfw_prepare(onert_session);
+
+  std::cout << "[Execution] Model compiled!" << std::endl;
+
+  // Prepare input/output data
+  std::vector<std::vector<uint8_t>> inputs(num_inputs);
+  std::vector<std::vector<uint8_t>> outputs(num_outputs);
+
+  bool generate_data = data_files.empty();
+  bool read_data = data_files.size() == num_inputs;
+  if (!generate_data && !read_data)
+  {
+    std::cerr << "[ ERROR ] "
+              << "Wrong number of input files." << std::endl;
+    exit(1);
+  }
+
+  const int seed = 1; /* TODO Add an option for seed value */
+  nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
+
+  for (uint32_t i = 0; i < num_inputs; i++)
+  {
+    nnfw_tensorinfo ti_input;
+    NNFW_ASSERT_FAIL(nnfw_input_tensorinfo(onert_session, i, &ti_input),
+                     "[ ERROR ] Failure during get input data info");
+    size_t input_size = num_elems(&ti_input) * sizeOfNnfwType(ti_input.dtype);
+
+    inputs[i].resize(input_size);
+
+    if (generate_data)
+    {
+      switch (ti_input.dtype)
+      {
+        case NNFW_TYPE_TENSOR_BOOL:
+          randomBoolData(randgen, inputs[i]);
+          break;
+        case NNFW_TYPE_TENSOR_UINT8:
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+          randomData<uint8_t>(randgen, inputs[i]);
+          break;
+        case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+          randomData<int8_t>(randgen, inputs[i]);
+          break;
+        case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+          randomData<int16_t>(randgen, inputs[i]);
+        case NNFW_TYPE_TENSOR_FLOAT32:
+          randomData<float>(randgen, inputs[i]);
+          break;
+        case NNFW_TYPE_TENSOR_INT32:
+          randomData<int32_t>(randgen, inputs[i]);
+          break;
+        case NNFW_TYPE_TENSOR_INT64:
+          randomData<uint64_t>(randgen, inputs[i]);
+          break;
+        default:
+          std::cerr << "[ ERROR ] "
+                    << "Unspported input data type" << std::endl;
+          exit(-1);
+          break;
+      }
+    }
+    else /* read_data */
+      readData(data_files[i], inputs[i]);
+
+    NNFW_ASSERT_FAIL(nnfw_set_input(onert_session, i, ti_input.dtype, inputs[i].data(), input_size),
+                     "[ ERROR ] Failure to set input tensor buffer");
+  }
+
+  std::cout << "[Execution] Input data is defined!" << std::endl;
+
+  for (uint32_t i = 0; i < num_outputs; i++)
+  {
+    nnfw_tensorinfo ti_output;
+    NNFW_ASSERT_FAIL(nnfw_output_tensorinfo(onert_session, i, &ti_output),
+                     "[ ERROR ] Failure during get output tensor info");
+
+    uint64_t output_elements = num_elems(&ti_output);
+    size_t output_size = output_elements * sizeOfNnfwType(ti_output.dtype);
+    outputs[i].resize(output_size);
+
+    NNFW_ASSERT_FAIL(
+      nnfw_set_output(onert_session, i, ti_output.dtype, outputs[i].data(), output_size),
+      "[ ERROR ] Failure to set output tensor buffer");
+  }
+
+  // Execute
+  NNFW_ASSERT_FAIL(nnfw_run(onert_session), "[Execution] Can't execute");
+
+  std::cout << "[Execution] Done!" << std::endl;
+
+  // Compare with tflite
+  std::cout << "[Comparison] Stage start!" << std::endl;
+  // Read tflite model
+  auto model = TfLiteModelCreateFromFile(tflite_file.c_str());
+  auto options = TfLiteInterpreterOptionsCreate();
+  TfLiteInterpreterOptionsSetNumThreads(options, nnfw::misc::EnvVar("THREAD").asInt(1));
+  auto interpreter = TfLiteInterpreterCreate(model, options);
+
+  auto sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter);
+  sess->prepare();
+  // Set input and run
+  for (uint32_t i = 0; i < num_inputs; i++)
+  {
+    auto input_tensor = TfLiteInterpreterGetInputTensor(interpreter, i);
+    memcpy(TfLiteTensorData(input_tensor), inputs[i].data(), inputs[i].size());
+  }
+  if (!sess->run())
+  {
+    std::cout << "[Comparison] TFLite run failed!" << std::endl;
+    assert(0 && "Run failed!");
+  }
+  std::cout << "[Comparison] TFLite run done!" << std::endl;
+
+  bool find_unmatched_output = false;
+
+  for (uint32_t out_idx = 0; out_idx < num_outputs; out_idx++)
+  {
+    nnfw_tensorinfo ti;
+    nnfw_output_tensorinfo(onert_session, out_idx, &ti);
+
+    bool matched = true;
+    // Check output tensor values
+    auto output_tensor = TfLiteInterpreterGetOutputTensor(interpreter, out_idx);
+    auto ref_output = TfLiteTensorData(output_tensor);
+    const auto &output = outputs[out_idx];
+
+    switch (ti.dtype)
+    {
+      case NNFW_TYPE_TENSOR_BOOL:
+        matched = exact(reinterpret_cast<uint8_t *>(ref_output), output, out_idx);
+        break;
+      case NNFW_TYPE_TENSOR_UINT8:
+      case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
+        matched = isClose<uint8_t>(reinterpret_cast<uint8_t *>(ref_output), output, out_idx);
+        break;
+      case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
+        matched = isClose<int8_t>(reinterpret_cast<int8_t *>(ref_output), output, out_idx);
+        break;
+      case NNFW_TYPE_TENSOR_INT32:
+        matched = isClose<int32_t>(reinterpret_cast<int32_t *>(ref_output), output, out_idx);
+        break;
+      case NNFW_TYPE_TENSOR_FLOAT32:
+        matched = isClose<float>(reinterpret_cast<float *>(ref_output), output, out_idx);
+        break;
+      case NNFW_TYPE_TENSOR_INT64:
+        matched = isClose<int64_t>(reinterpret_cast<int64_t *>(ref_output), output, out_idx);
+        break;
+      default:
+        throw std::runtime_error{"Invalid tensor type"};
+    }
+
+    if (!matched)
+      find_unmatched_output = true;
+  }
+
+  // Print results
+  int ret = 0;
+  if (find_unmatched_output)
+  {
+    std::cout << "[Comparison] outputs is not equal!" << std::endl;
+    ret = 1;
+  }
+  else
+  {
+    std::cout << "[Comparison] Outputs is equal!" << std::endl;
+  }
+  std::cout << "[Comparison] Done!" << std::endl;
+
+  nnfw_close_session(onert_session);
+
+  return ret;
+}
diff --git a/tests/tools/tflite_loader/CMakeLists.txt b/tests/tools/tflite_loader/CMakeLists.txt
deleted file mode 100644
index 0fe1c69de..000000000
--- a/tests/tools/tflite_loader/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-if(NOT BUILD_TFLITE_LOADER_TEST_TOOL)
-  message("skipping tflite loader tool build")
-  return()
-endif(NOT BUILD_TFLITE_LOADER_TEST_TOOL)
-
-if(NOT BUILD_ONERT)
-  message("skipping tflite loader tool build: onert is not built")
-  return()
-endif(NOT BUILD_ONERT)
-
-list(APPEND SOURCES "src/tflite_loader.cc")
-list(APPEND SOURCES "src/args.cc")
-
-nnfw_find_package(Boost REQUIRED program_options system filesystem)
-
-add_executable(tflite_loader_test_tool ${SOURCES})
-target_include_directories(tflite_loader_test_tool PRIVATE ${Boost_INCLUDE_DIRS})
-
-target_link_libraries(tflite_loader_test_tool onert_core onert tflite_loader)
-target_link_libraries(tflite_loader_test_tool nnfw_lib_tflite nnfw_lib_misc)
-target_link_libraries(tflite_loader_test_tool ${Boost_PROGRAM_OPTIONS_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY})
-
-install(TARGETS tflite_loader_test_tool DESTINATION bin)
diff --git a/tests/tools/tflite_loader/src/tflite_loader.cc b/tests/tools/tflite_loader/src/tflite_loader.cc
deleted file mode 100644
index ce099210b..000000000
--- a/tests/tools/tflite_loader/src/tflite_loader.cc
+++ /dev/null
@@ -1,311 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tflite/ext/kernels/register.h"
-
-#include "args.h"
-#include "tflite/InterpreterSession.h"
-#include "tflite/Assert.h"
-#include "tflite/Diff.h"
-#include "misc/tensor/IndexIterator.h"
-
-#include <iostream>
-#include <fstream>
-
-#include "compiler/Compiler.h"
-#include "exec/Execution.h"
-#include "ir/Graph.h"
-
-#include "tflite_loader.h"
-
-#include <memory>
-
-const int RUN_FAILED = 1;
-
-using namespace tflite;
-using namespace nnfw::tflite;
-
-const int FILE_ERROR = 2;
-const float DIFFERENCE_THRESHOLD = 10e-5;
-
-// Read vector of floats from selected file
-std::vector<float> readData(const string &path)
-{
-  std::ifstream in(path);
-  if (!in.good())
-  {
-    std::cerr << "can not open data file " << path << "\n";
-    exit(FILE_ERROR);
-  }
-  in.seekg(0, std::ifstream::end);
-  size_t len = in.tellg();
-  in.seekg(0, std::ifstream::beg);
-  assert(len % sizeof(float) == 0);
-  size_t size = len / sizeof(float);
-  std::vector<float> vec(size);
-  for (size_t i = 0; i < size; ++i)
-  {
-    in.read(reinterpret_cast<char *>(&vec[i]), sizeof(float));
-  }
-  return vec;
-}
-
-std::vector<float> randomData(nnfw::misc::RandomGenerator &randgen, const uint64_t size)
-{
-  std::vector<float> vec(size);
-  for (uint64_t i = 0; i < size; i++)
-  {
-    vec[i] = randgen.generate<float>();
-  }
-  return vec;
-}
-
-void executeGraph(const std::shared_ptr<onert::ir::Graph> &g,
-                  const std::vector<std::vector<float>> &inputs,
-                  std::vector<std::vector<float>> &outputs)
-{
-  auto subgs = std::make_shared<onert::ir::Subgraphs>();
-  subgs->push(onert::ir::SubgraphIndex{0}, g);
-  auto compiler = new onert::compiler::Compiler(subgs);
-  std::shared_ptr<onert::exec::ExecutorMap> executors;
-  // Compilation
-  try
-  {
-    executors = compiler->compile();
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << "[Execution] Can't compile model" << std::endl;
-    std::cerr << e.what() << std::endl;
-    exit(-1);
-  }
-
-  std::cout << "[Execution] Graph compiled!" << std::endl;
-
-  auto execution = std::make_shared<onert::exec::Execution>(executors);
-
-  // Setting IO
-  try
-  {
-    // Verify input shapes
-    auto num_inputs = inputs.size();
-    for (size_t i = 0; i < num_inputs; i++)
-    {
-      auto input_operand_idx = g->getInputs().at(i);
-      auto input_shape = g->operands().at(input_operand_idx).shape();
-      assert(inputs[i].size() == input_shape.num_elements());
-    }
-
-    // Set output shapes
-    auto num_outputs = g->getOutputs().size();
-    outputs.resize(num_outputs);
-    for (uint32_t i = 0; i < num_outputs; i++)
-    {
-      auto output_operand_idx = g->getOutputs().at(i);
-      auto output_shape = g->operands().at(output_operand_idx).shape();
-      outputs[i].resize(output_shape.num_elements());
-    }
-
-    for (size_t i = 0; i < num_inputs; i++)
-      execution->setInput(onert::ir::IOIndex(i), inputs[i].data(),
-                          inputs[i].size() * sizeof(float));
-    for (uint32_t i = 0; i < num_outputs; i++)
-      execution->setOutput(onert::ir::IOIndex(i), outputs[i].data(),
-                           outputs[i].size() * sizeof(float));
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << "[Execution] Can't set model IO" << std::endl;
-    std::cerr << e.what() << '\n';
-    exit(-1);
-  }
-
-  try
-  {
-    execution->execute();
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << "[Execution] Can't execute" << std::endl;
-    std::cerr << e.what() << '\n';
-    exit(-1);
-  }
-
-  std::cout << "[Execution] Done!" << std::endl;
-
-  delete compiler;
-}
-
-int main(const int argc, char **argv)
-{
-  TFLiteRun::Args args(argc, argv);
-
-  auto tflite_file = args.getTFLiteFilename();
-  auto data_files = args.getDataFilenames();
-
-  if (tflite_file.empty())
-  {
-    args.print(argv);
-    return RUN_FAILED;
-  }
-
-  std::cout << "[Execution] Stage start!" << std::endl;
-  std::shared_ptr<onert::ir::Graph> test_graph;
-  // Loading
-  try
-  {
-    test_graph =
-        onert::tflite_loader::loadModel(tflite_file.c_str())->at(onert::ir::SubgraphIndex{0});
-  }
-  catch (std::exception &e)
-  {
-    std::cerr << "[ ERROR ] "
-              << "Failure during model load" << std::endl;
-    std::cerr << e.what() << std::endl;
-    exit(-1);
-  }
-
-  // TODO: Support another input/output types
-  for (const auto &input_idx : test_graph->getInputs())
-  {
-    const auto input_type = test_graph->operands().at(input_idx).typeInfo().type();
-    assert(input_type == onert::ir::DataType::FLOAT32 && "Only FLOAT32 inputs are supported");
-  }
-  for (const auto &output_idx : test_graph->getOutputs())
-  {
-    const auto output_type = test_graph->operands().at(output_idx).typeInfo().type();
-    assert(output_type == onert::ir::DataType::FLOAT32 && "Only FLOAT32 outputs are supported");
-  }
-
-  std::cout << "[Execution] Model is deserialized!" << std::endl;
-  auto num_inputs = test_graph->getInputs().size();
-  std::vector<std::vector<float>> inputs(num_inputs);
-  bool generate_data = data_files.empty();
-  bool read_data = data_files.size() == num_inputs;
-  if (num_inputs == 0)
-  {
-    std::cerr << "[ ERROR ] "
-              << "No inputs in model => execution is not possible" << std::endl;
-    exit(1);
-  }
-  if (!generate_data && !read_data)
-  {
-    std::cerr << "[ ERROR ] "
-              << "Wrong number of input files." << std::endl;
-    exit(1);
-  }
-
-  const int seed = 1; /* TODO Add an option for seed value */
-  nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
-  try
-  {
-    for (uint32_t i = 0; i < num_inputs; i++)
-    {
-      if (generate_data)
-      {
-        uint64_t sz =
-            test_graph->operands().at(test_graph->getInputs().at(i)).shape().num_elements();
-        inputs[i] = randomData(randgen, sz);
-      }
-      else /* read_data */
-        inputs[i] = readData(data_files[i]);
-    }
-  }
-  catch (std::exception &e)
-  {
-    std::cerr << "[ ERROR ] "
-              << "Failure during input data generation" << std::endl;
-    std::cerr << e.what() << std::endl;
-    exit(-1);
-  }
-
-  std::cout << "[Execution] Input data is defined!" << std::endl;
-  std::vector<std::vector<float>> outputs;
-  // Run graph
-  executeGraph(test_graph, inputs, outputs);
-  // Compare with tflite
-  std::cout << "[Comparison] Stage start!" << std::endl;
-  // Read tflite model
-  StderrReporter error_reporter;
-  auto model = FlatBufferModel::BuildFromFile(tflite_file.c_str(), &error_reporter);
-
-  BuiltinOpResolver resolver;
-  InterpreterBuilder builder(*model, resolver);
-
-  std::unique_ptr<Interpreter> interpreter;
-  try
-  {
-    TFLITE_ENSURE(builder(&interpreter));
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << e.what() << std::endl;
-    exit(FILE_ERROR);
-  }
-  interpreter->SetNumThreads(2);
-
-  auto sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get());
-  sess->prepare();
-  // Set input and run
-  for (uint32_t i = 0; i < num_inputs; i++)
-  {
-    auto input_tensor = interpreter->tensor(interpreter->inputs().at(i));
-    memcpy(input_tensor->data.f, inputs[i].data(), inputs[i].size() * sizeof(float));
-  }
-  if (!sess->run())
-  {
-    std::cout << "[Comparison] TFLite run failed!" << std::endl;
-    assert(0 && "Run failed!");
-  }
-  std::cout << "[Comparison] TFLite run done!" << std::endl;
-
-  // Calculate max difference over all outputs
-  float max_difference = 0.0f;
-  auto num_outputs = test_graph->getOutputs().size();
-  for (uint32_t out_idx = 0; out_idx < num_outputs; out_idx++)
-  {
-    const auto &tflite_output_tensor = interpreter->tensor(interpreter->outputs().at(out_idx));
-    const auto &nnfw_output_tensor = outputs[out_idx];
-
-    if (nnfw_output_tensor.size() != tflite_output_tensor->bytes / sizeof(float))
-      std::cout << "[Comparison] Different size of outputs!" << std::endl;
-    // Check max difference
-    float *tflite_out_ptr = tflite_output_tensor->data.f;
-    for (const auto &nnfw_out : nnfw_output_tensor)
-    {
-      if (std::abs(nnfw_out - *tflite_out_ptr) > max_difference)
-        max_difference = std::abs(nnfw_out - *tflite_out_ptr);
-
-      tflite_out_ptr++;
-    }
-  }
-
-  // Print results
-  std::cout << "[Comparison] Max difference: " << max_difference << std::endl;
-  int ret = 0;
-  if (max_difference > DIFFERENCE_THRESHOLD)
-  {
-    std::cout << "[Comparison] Outputs is not equal!" << std::endl;
-    ret = 1;
-  }
-  else
-  {
-    std::cout << "[Comparison] Outputs is equal!" << std::endl;
-  }
-  std::cout << "[Comparison] Done!" << std::endl;
-
-  return ret;
-}
diff --git a/tests/tools/tflite_run/CMakeLists.txt b/tests/tools/tflite_run/CMakeLists.txt
index 3f30d3e32..bbe199294 100644
--- a/tests/tools/tflite_run/CMakeLists.txt
+++ b/tests/tools/tflite_run/CMakeLists.txt
@@ -32,4 +32,4 @@ add_executable(tflite_test src/tflite_test.cc)
 ## Link test executable against gtest & gtest_main
 target_link_libraries(tflite_test gtest gtest_main ${LIB_PTHREAD})
 ## install test binary for packaging
-install(TARGETS tflite_test DESTINATION unittest_standalone)
+install(TARGETS tflite_test DESTINATION unittest)
diff --git a/tests/tools/tflite_run/src/bin_image.cc b/tests/tools/tflite_run/src/bin_image.cc
index 16d4c94f7..fadece045 100644
--- a/tests/tools/tflite_run/src/bin_image.cc
+++ b/tests/tools/tflite_run/src/bin_image.cc
@@ -20,7 +20,7 @@
 #include "bin_image.h"
 
 BinImage::BinImage(unsigned int width, unsigned int height, unsigned int channels)
-    : _width(width), _height(height), _channels(channels)
+  : _width(width), _height(height), _channels(channels)
 {
 }
 
diff --git a/tests/tools/tflite_run/src/tensor_dumper.cc b/tests/tools/tflite_run/src/tensor_dumper.cc
index 4ccd4e11a..86d37de5d 100644
--- a/tests/tools/tflite_run/src/tensor_dumper.cc
+++ b/tests/tools/tflite_run/src/tensor_dumper.cc
@@ -20,7 +20,7 @@
 #include <iostream>
 #include <cstring>
 
-#include "tensorflow/lite/interpreter.h"
+#include <tensorflow/lite/c/c_api.h>
 
 namespace TFLiteRun
 {
@@ -30,16 +30,31 @@ TensorDumper::TensorDumper()
   // DO NOTHING
 }
 
-void TensorDumper::addTensors(tflite::Interpreter &interpreter, const std::vector<int> &indices)
+void TensorDumper::addInputTensors(TfLiteInterpreter &interpreter)
 {
-  for (const auto &o : indices)
+  auto const input_count = TfLiteInterpreterGetInputTensorCount(&interpreter);
+  for (int32_t idx = 0; idx < input_count; idx++)
   {
-    const TfLiteTensor *tensor = interpreter.tensor(o);
-    int size = tensor->bytes;
+    const TfLiteTensor *tensor = TfLiteInterpreterGetInputTensor(&interpreter, idx);
+    auto size = TfLiteTensorByteSize(tensor);
     std::vector<char> buffer;
     buffer.resize(size);
-    memcpy(buffer.data(), tensor->data.raw, size);
-    _tensors.emplace_back(o, std::move(buffer));
+    memcpy(buffer.data(), TfLiteTensorData(tensor), size);
+    _input_tensors.emplace_back(idx, std::move(buffer));
+  }
+}
+
+void TensorDumper::addOutputTensors(TfLiteInterpreter &interpreter)
+{
+  auto const output_count = TfLiteInterpreterGetOutputTensorCount(&interpreter);
+  for (int32_t idx = 0; idx < output_count; idx++)
+  {
+    const TfLiteTensor *tensor = TfLiteInterpreterGetOutputTensor(&interpreter, idx);
+    auto size = TfLiteTensorByteSize(tensor);
+    std::vector<char> buffer;
+    buffer.resize(size);
+    memcpy(buffer.data(), TfLiteTensorData(tensor), size);
+    _output_tensors.emplace_back(idx, std::move(buffer));
   }
 }
 
@@ -49,17 +64,30 @@ void TensorDumper::dump(const std::string &filename) const
   std::ofstream file(filename, std::ios::out | std::ios::binary);
 
   // Write number of tensors
-  uint32_t num_tensors = static_cast<uint32_t>(_tensors.size());
+  uint32_t num_tensors =
+    static_cast<uint32_t>(_input_tensors.size()) + static_cast<uint32_t>(_output_tensors.size());
   file.write(reinterpret_cast<const char *>(&num_tensors), sizeof(num_tensors));
 
-  // Write tensor indices
-  for (const auto &t : _tensors)
+  // Write input tensor indices
+  for (const auto &t : _input_tensors)
   {
     file.write(reinterpret_cast<const char *>(&t._index), sizeof(int));
   }
 
-  // Write data
-  for (const auto &t : _tensors)
+  // Write output tensor indices
+  for (const auto &t : _output_tensors)
+  {
+    file.write(reinterpret_cast<const char *>(&t._index), sizeof(int));
+  }
+
+  // Write input data
+  for (const auto &t : _input_tensors)
+  {
+    file.write(t._data.data(), t._data.size());
+  }
+
+  // Write output data
+  for (const auto &t : _output_tensors)
   {
     file.write(t._data.data(), t._data.size());
   }
diff --git a/tests/tools/tflite_run/src/tensor_dumper.h b/tests/tools/tflite_run/src/tensor_dumper.h
index 5fdcc54f7..5847c3971 100644
--- a/tests/tools/tflite_run/src/tensor_dumper.h
+++ b/tests/tools/tflite_run/src/tensor_dumper.h
@@ -17,6 +17,8 @@
 #ifndef __TFLITE_RUN_TENSOR_DUMPER_H__
 #define __TFLITE_RUN_TENSOR_DUMPER_H__
 
+#include <tensorflow/lite/c/c_api.h>
+
 #include <memory>
 #include <string>
 #include <vector>
@@ -42,11 +44,13 @@ private:
 
 public:
   TensorDumper();
-  void addTensors(tflite::Interpreter &interpreter, const std::vector<int> &indices);
+  void addInputTensors(TfLiteInterpreter &interpreter);
+  void addOutputTensors(TfLiteInterpreter &interpreter);
   void dump(const std::string &filename) const;
 
 private:
-  std::vector<Tensor> _tensors;
+  std::vector<Tensor> _input_tensors;
+  std::vector<Tensor> _output_tensors;
 };
 
 } // end of namespace TFLiteRun
diff --git a/tests/tools/tflite_run/src/tensor_loader.cc b/tests/tools/tflite_run/src/tensor_loader.cc
index 93d9e2f54..ebd64470d 100644
--- a/tests/tools/tflite_run/src/tensor_loader.cc
+++ b/tests/tools/tflite_run/src/tensor_loader.cc
@@ -18,6 +18,7 @@
 
 #include <assert.h>
 
+#include <cstring>
 #include <fstream>
 
 #include "misc/tensor/Shape.h"
@@ -25,8 +26,8 @@
 namespace TFLiteRun
 {
 
-TensorLoader::TensorLoader(tflite::Interpreter &interpreter)
-    : _interpreter(interpreter), _raw_data(nullptr)
+TensorLoader::TensorLoader(TfLiteInterpreter &interpreter)
+  : _interpreter(interpreter), _raw_data(nullptr)
 {
 }
 
@@ -42,21 +43,20 @@ void TensorLoader::loadDumpedTensors(const std::string &filename)
 
   int tensor_indices_raw[num_tensors];
   file.read(reinterpret_cast<char *>(tensor_indices_raw), sizeof(tensor_indices_raw));
-  std::vector<int> tensor_indices(tensor_indices_raw, tensor_indices_raw + num_tensors);
 
   _raw_data = std::unique_ptr<float[]>(new float[file_size]);
   file.read(reinterpret_cast<char *>(_raw_data.get()), file_size);
   file.close();
 
-  size_t read_bytes = loadTensorsFromRawData(tensor_indices);
+  size_t read_bytes = loadInputTensorsFromRawData();
+  read_bytes += loadOutputTensorsFromRawData();
 
   // The file size and total output tensor size must match
   assert(file_size ==
          sizeof(num_tensors) + sizeof(tensor_indices_raw) + read_bytes * sizeof(float));
 }
 
-void TensorLoader::loadRawTensors(const std::string &filename,
-                                  const std::vector<int> &tensor_indices)
+void TensorLoader::loadRawInputTensors(const std::string &filename)
 {
   // TODO Handle file open/read error
   std::ifstream file(filename, std::ios::ate | std::ios::binary);
@@ -67,41 +67,74 @@ void TensorLoader::loadRawTensors(const std::string &filename,
   file.read(reinterpret_cast<char *>(_raw_data.get()), file_size);
   file.close();
 
-  size_t read_bytes = loadTensorsFromRawData(tensor_indices);
+  size_t read_bytes = loadInputTensorsFromRawData();
 
   // The file size and total output tensor size must match
   assert(file_size == read_bytes * sizeof(float));
 }
 
-size_t TensorLoader::loadTensorsFromRawData(const std::vector<int> &tensor_indices)
+size_t TensorLoader::loadInputTensorsFromRawData()
 {
   size_t offset = 0;
-  for (const auto &o : tensor_indices)
+  auto const input_count = TfLiteInterpreterGetInputTensorCount(&_interpreter);
+  for (auto idx = 0; idx < input_count; idx++)
   {
-    const TfLiteTensor *tensor = _interpreter.tensor(o);
+    const TfLiteTensor *tensor = TfLiteInterpreterGetInputTensor(&_interpreter, idx);
 
     // Convert tensor shape to `Shape` from `tensor->dims`
-    nnfw::misc::tensor::Shape shape(static_cast<size_t>(tensor->dims->size));
-    for (int d = 0; d < tensor->dims->size; d++)
+    nnfw::misc::tensor::Shape shape(TfLiteTensorNumDims(tensor));
+    for (int32_t d = 0; d < TfLiteTensorNumDims(tensor); d++)
     {
-      shape.dim(d) = tensor->dims->data[d];
+      shape.dim(d) = TfLiteTensorDim(tensor, d);
     }
 
     float *base = _raw_data.get() + offset;
 
-    assert(tensor->bytes % sizeof(float) == 0);
-    offset += (tensor->bytes / sizeof(float));
+    assert(TfLiteTensorByteSize(tensor) % sizeof(float) == 0);
+    offset += (TfLiteTensorByteSize(tensor) / sizeof(float));
 
-    _tensor_map.insert(std::make_pair(o, nnfw::tflite::TensorView<float>(shape, base)));
+    _input_tensor_map.emplace(idx, nnfw::tflite::TensorView<float>(shape, base));
+
+    memcpy(TfLiteTensorData(tensor), reinterpret_cast<const void *>(base),
+           TfLiteTensorByteSize(tensor));
+  }
+
+  return offset;
+}
+
+size_t TensorLoader::loadOutputTensorsFromRawData()
+{
+  size_t offset = 0;
+  auto const output_count = TfLiteInterpreterGetOutputTensorCount(&_interpreter);
+  for (auto idx = 0; idx < output_count; idx++)
+  {
+    const TfLiteTensor *tensor = TfLiteInterpreterGetOutputTensor(&_interpreter, idx);
+
+    // Convert tensor shape to `Shape` from `tensor->dims`
+    nnfw::misc::tensor::Shape shape(TfLiteTensorNumDims(tensor));
+    for (int32_t d = 0; d < TfLiteTensorNumDims(tensor); d++)
+    {
+      shape.dim(d) = TfLiteTensorDim(tensor, d);
+    }
+
+    float *base = _raw_data.get() + offset;
+
+    assert(TfLiteTensorByteSize(tensor) % sizeof(float) == 0);
+    offset += (TfLiteTensorByteSize(tensor) / sizeof(float));
+
+    _output_tensor_map.emplace(idx, nnfw::tflite::TensorView<float>(shape, base));
+
+    memcpy(TfLiteTensorData(tensor), reinterpret_cast<const void *>(base),
+           TfLiteTensorByteSize(tensor));
   }
 
   return offset;
 }
 
-const nnfw::tflite::TensorView<float> &TensorLoader::get(int tensor_idx) const
+const nnfw::tflite::TensorView<float> &TensorLoader::getOutput(int tensor_idx) const
 {
-  auto found = _tensor_map.find(tensor_idx);
-  assert(found != _tensor_map.end());
+  auto found = _output_tensor_map.find(tensor_idx);
+  assert(found != _output_tensor_map.end());
   return found->second;
 }
 
diff --git a/tests/tools/tflite_run/src/tensor_loader.h b/tests/tools/tflite_run/src/tensor_loader.h
index ef51e0fd4..b9e6b72cb 100644
--- a/tests/tools/tflite_run/src/tensor_loader.h
+++ b/tests/tools/tflite_run/src/tensor_loader.h
@@ -17,13 +17,14 @@
 #ifndef __TFLITE_RUN_TENSOR_LOADER_H__
 #define __TFLITE_RUN_TENSOR_LOADER_H__
 
+#include "tflite/TensorView.h"
+
 #include <sys/mman.h>
 
+#include <memory>
 #include <string>
 #include <unordered_map>
 
-#include "tflite/TensorView.h"
-
 namespace tflite
 {
 class Interpreter;
@@ -35,17 +36,18 @@ namespace TFLiteRun
 class TensorLoader
 {
 public:
-  TensorLoader(tflite::Interpreter &interpreter);
+  TensorLoader(TfLiteInterpreter &interpreter);
   void loadDumpedTensors(const std::string &filename);
-  void loadRawTensors(const std::string &filename, const std::vector<int> &tensor_indices);
-  const nnfw::tflite::TensorView<float> &get(int tensor_idx) const;
-  size_t getNums() const { return _tensor_map.size(); }
+  void loadRawInputTensors(const std::string &filename);
+  const nnfw::tflite::TensorView<float> &getOutput(int tensor_idx) const;
 
 private:
-  size_t loadTensorsFromRawData(const std::vector<int> &tensor_indices);
-  tflite::Interpreter &_interpreter;
+  size_t loadInputTensorsFromRawData();
+  size_t loadOutputTensorsFromRawData();
+  TfLiteInterpreter &_interpreter;
   std::unique_ptr<float[]> _raw_data;
-  std::unordered_map<int, nnfw::tflite::TensorView<float>> _tensor_map;
+  std::unordered_map<int, nnfw::tflite::TensorView<float>> _input_tensor_map;
+  std::unordered_map<int, nnfw::tflite::TensorView<float>> _output_tensor_map;
 };
 
 } // end of namespace TFLiteRun
diff --git a/tests/tools/tflite_run/src/tflite_run.cc b/tests/tools/tflite_run/src/tflite_run.cc
index e72966db5..a1e3d2eb6 100644
--- a/tests/tools/tflite_run/src/tflite_run.cc
+++ b/tests/tools/tflite_run/src/tflite_run.cc
@@ -14,9 +14,6 @@
  * limitations under the License.
  */
 
-#include "tflite/ext/kernels/register.h"
-#include "tensorflow/lite/model.h"
-
 #include "args.h"
 #include "tensor_dumper.h"
 #include "tensor_loader.h"
@@ -26,8 +23,8 @@
 #include "tflite/Diff.h"
 #include "tflite/Assert.h"
 #include "tflite/Session.h"
+#include "tflite/RandomInputInitializer.h"
 #include "tflite/InterpreterSession.h"
-#include "tflite/NNAPISession.h"
 #include "misc/tensor/IndexIterator.h"
 #include "misc/tensor/Object.h"
 #include "benchmark.h"
@@ -54,31 +51,10 @@ void print_max_idx(float *f, int size)
 
 static const char *default_backend_cand = "tflite_cpu";
 
-// Verifies whether the model is a flatbuffer file.
-class BMFlatBufferVerifier : public tflite::TfLiteVerifier
-{
-public:
-  bool Verify(const char *data, int length, tflite::ErrorReporter *reporter) override
-  {
-
-    flatbuffers::Verifier verifier(reinterpret_cast<const uint8_t *>(data), length);
-    if (!tflite::VerifyModelBuffer(verifier))
-    {
-      reporter->Report("The model is not a valid Flatbuffer file");
-      return false;
-    }
-    return true;
-  }
-};
-
-} // namespace anonymous
+} // namespace
 
 int main(const int argc, char **argv)
 {
-  const bool use_nnapi = nnfw::misc::EnvVar("USE_NNAPI").asBool(false);
-
-  StderrReporter error_reporter;
-
   TFLiteRun::Args args(argc, argv);
 
   std::chrono::milliseconds t_model_load(0), t_prepare(0);
@@ -86,33 +62,14 @@ int main(const int argc, char **argv)
   // TODO Apply verbose level to phases
   const int verbose = args.getVerboseLevel();
   benchmark::Phases phases(
-      benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
+    benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
 
-  std::unique_ptr<FlatBufferModel> model;
-  std::unique_ptr<Interpreter> interpreter;
-  std::unique_ptr<tflite::TfLiteVerifier> verifier{new BMFlatBufferVerifier};
+  TfLiteModel *model = nullptr;
 
   try
   {
     phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
-      if (args.getModelValidate())
-      {
-        model = FlatBufferModel::VerifyAndBuildFromFile(args.getTFLiteFilename().c_str(),
-                                                        verifier.get(), &error_reporter);
-      }
-      else
-      {
-        model = FlatBufferModel::BuildFromFile(args.getTFLiteFilename().c_str(), &error_reporter);
-      }
-      if (model == nullptr)
-      {
-        throw std::runtime_error{"Cannot create model"};
-      }
-
-      BuiltinOpResolver resolver;
-      InterpreterBuilder builder(*model, resolver);
-      TFLITE_ENSURE(builder(&interpreter))
-      interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(-1));
+      model = TfLiteModelCreateFromFile(args.getTFLiteFilename().c_str());
     });
   }
   catch (const std::exception &e)
@@ -121,17 +78,16 @@ int main(const int argc, char **argv)
     return 1;
   }
 
-  std::shared_ptr<nnfw::tflite::Session> sess;
-
-  if (use_nnapi)
-  {
-    sess = std::make_shared<nnfw::tflite::NNAPISession>(interpreter.get());
-  }
-  else
+  if (model == nullptr)
   {
-    sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get());
+    throw std::runtime_error{"Cannot create model"};
   }
 
+  auto options = TfLiteInterpreterOptionsCreate();
+  TfLiteInterpreterOptionsSetNumThreads(options, nnfw::misc::EnvVar("THREAD").asInt(1));
+
+  TfLiteInterpreter *interpreter = TfLiteInterpreterCreate(model, options);
+  auto sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter);
   try
   {
     phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) { sess->prepare(); });
@@ -144,27 +100,28 @@ int main(const int argc, char **argv)
 
   if (args.getInputShapes().size() != 0)
   {
-    const int dim_values = args.getInputShapes().size();
-    int offset = 0;
+    const auto dim_values = args.getInputShapes().size();
+    int32_t offset = 0;
 
-    for (const auto &id : interpreter->inputs())
+    auto const input_count = TfLiteInterpreterGetInputTensorCount(interpreter);
+    for (int32_t id = 0; id < input_count; id++)
     {
-      TfLiteTensor *tensor = interpreter->tensor(id);
+      TfLiteTensor *tensor = TfLiteInterpreterGetInputTensor(interpreter, id);
       std::vector<int32_t> new_dim;
-      new_dim.resize(tensor->dims->size);
+      new_dim.resize(TfLiteTensorNumDims(tensor));
 
-      for (uint32_t axis = 0; axis < tensor->dims->size; axis++, offset++)
+      for (int32_t axis = 0; axis < TfLiteTensorNumDims(tensor); axis++, offset++)
       {
         new_dim[axis] =
-            ((offset < dim_values) ? args.getInputShapes()[offset] : tensor->dims->data[axis]);
+          ((offset < dim_values) ? args.getInputShapes()[offset] : TfLiteTensorDim(tensor, axis));
       }
 
-      interpreter->ResizeInputTensor(id, new_dim);
+      TfLiteInterpreterResizeInputTensor(interpreter, id, new_dim.data(), new_dim.size());
 
       if (offset >= dim_values)
         break;
     }
-    interpreter->AllocateTensors();
+    TfLiteInterpreterAllocateTensors(interpreter);
   }
 
   TFLiteRun::TensorLoader tensor_loader(*interpreter);
@@ -175,101 +132,31 @@ int main(const int argc, char **argv)
   {
     if (!args.getInputFilename().empty())
     {
-      tensor_loader.loadRawTensors(args.getInputFilename(), interpreter->inputs());
+      tensor_loader.loadRawInputTensors(args.getInputFilename());
     }
     else
     {
       tensor_loader.loadDumpedTensors(args.getCompareFilename());
     }
-
-    for (const auto &o : interpreter->inputs())
-    {
-      const auto &tensor_view = tensor_loader.get(o);
-      TfLiteTensor *tensor = interpreter->tensor(o);
-
-      memcpy(reinterpret_cast<void *>(tensor->data.f),
-             reinterpret_cast<const void *>(tensor_view._base), tensor->bytes);
-    }
   }
   else
   {
     const int seed = 1; /* TODO Add an option for seed value */
     nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
 
-    // No input specified. So we fill the input tensors with random values.
-    for (const auto &o : interpreter->inputs())
-    {
-      TfLiteTensor *tensor = interpreter->tensor(o);
-      if (tensor->type == kTfLiteInt32)
-      {
-        // Generate singed 32-bit integer (s32) input
-        auto tensor_view = nnfw::tflite::TensorView<int32_t>::make(*interpreter, o);
-
-        int32_t value = 0;
-
-        nnfw::misc::tensor::iterate(tensor_view.shape())
-            << [&](const nnfw::misc::tensor::Index &ind) {
-                 // TODO Generate random values
-                 // Gather operation: index should be within input coverage.
-                 tensor_view.at(ind) = value;
-                 value++;
-               };
-      }
-      else if (tensor->type == kTfLiteUInt8)
-      {
-        // Generate unsigned 8-bit integer input
-        auto tensor_view = nnfw::tflite::TensorView<uint8_t>::make(*interpreter, o);
-
-        auto fp = static_cast<uint8_t (nnfw::misc::RandomGenerator::*)(
-            const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-            &nnfw::misc::RandomGenerator::generate<uint8_t>);
-        const nnfw::misc::tensor::Object<uint8_t> data(tensor_view.shape(),
-                                                       std::bind(fp, randgen, _1, _2));
-
-        nnfw::misc::tensor::iterate(tensor_view.shape())
-            << [&](const nnfw::misc::tensor::Index &ind) {
-                 const auto value = data.at(ind);
-                 tensor_view.at(ind) = value;
-               };
-      }
-      else if (tensor->type == kTfLiteBool)
-      {
-        // Generate bool input
-        auto tensor_view = nnfw::tflite::TensorView<bool>::make(*interpreter, o);
-
-        auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
-            const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-            &nnfw::misc::RandomGenerator::generate<bool>);
-        const nnfw::misc::tensor::Object<bool> data(tensor_view.shape(),
-                                                    std::bind(fp, randgen, _1, _2));
-
-        nnfw::misc::tensor::iterate(tensor_view.shape())
-            << [&](const nnfw::misc::tensor::Index &ind) {
-                 const auto value = data.at(ind);
-                 tensor_view.at(ind) = value;
-               };
-      }
-      else
-      {
-        assert(tensor->type == kTfLiteFloat32);
-
-        const float *end = reinterpret_cast<const float *>(tensor->data.raw_const + tensor->bytes);
-        for (float *ptr = tensor->data.f; ptr < end; ptr++)
-        {
-          *ptr = randgen.generate<float>();
-        }
-      }
-    }
+    RandomInputInitializer initializer{randgen};
+    initializer.run(*interpreter);
   }
 
   TFLiteRun::TensorDumper tensor_dumper;
   // Must be called before `interpreter->Invoke()`
-  tensor_dumper.addTensors(*interpreter, interpreter->inputs());
+  tensor_dumper.addInputTensors(*interpreter);
 
   std::cout << "input tensor indices = [";
-  for (const auto &o : interpreter->inputs())
+  auto const input_count = TfLiteInterpreterGetInputTensorCount(interpreter);
+  for (int32_t idx = 0; idx < input_count; idx++)
   {
-    std::cout << o << ",";
+    std::cout << idx << ",";
   }
   std::cout << "]" << std::endl;
 
@@ -277,40 +164,42 @@ int main(const int argc, char **argv)
   // only warmup.
   if (verbose == 0)
   {
-    phases.run("WARMUP", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
-               args.getWarmupRuns());
-    phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
-               args.getNumRuns(), true);
+    phases.run(
+      "WARMUP", [&](const benchmark::Phase &, uint32_t) { sess->run(); }, args.getWarmupRuns());
+    phases.run(
+      "EXECUTE", [&](const benchmark::Phase &, uint32_t) { sess->run(); }, args.getNumRuns(), true);
   }
   else
   {
-    phases.run("WARMUP", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
-               [&](const benchmark::Phase &phase, uint32_t nth) {
-                 std::cout << "... "
-                           << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
-                           << std::endl;
-               },
-               args.getWarmupRuns());
-    phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
-               [&](const benchmark::Phase &phase, uint32_t nth) {
-                 std::cout << "... "
-                           << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
-                           << std::endl;
-               },
-               args.getNumRuns(), true);
+    phases.run(
+      "WARMUP", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
+      [&](const benchmark::Phase &phase, uint32_t nth) {
+        std::cout << "... "
+                  << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
+                  << std::endl;
+      },
+      args.getWarmupRuns());
+    phases.run(
+      "EXECUTE", [&](const benchmark::Phase &, uint32_t) { sess->run(); },
+      [&](const benchmark::Phase &phase, uint32_t nth) {
+        std::cout << "... "
+                  << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms" << std::endl;
+      },
+      args.getNumRuns(), true);
   }
 
   sess->teardown();
 
   // Must be called after `interpreter->Invoke()`
-  tensor_dumper.addTensors(*interpreter, interpreter->outputs());
+  tensor_dumper.addOutputTensors(*interpreter);
 
   std::cout << "output tensor indices = [";
-  for (const auto &o : interpreter->outputs())
+  auto const output_count = TfLiteInterpreterGetOutputTensorCount(interpreter);
+  for (int32_t idx = 0; idx < output_count; idx++)
   {
-    std::cout << o << "(";
-
-    print_max_idx(interpreter->tensor(o)->data.f, interpreter->tensor(o)->bytes / sizeof(float));
+    auto tensor = TfLiteInterpreterGetOutputTensor(interpreter, idx);
+    print_max_idx(reinterpret_cast<float *>(TfLiteTensorData(tensor)),
+                  TfLiteTensorByteSize(tensor) / sizeof(float));
 
     std::cout << "),";
   }
@@ -374,12 +263,13 @@ int main(const int argc, char **argv)
     TfLiteInterpMatchApp app(comparator);
     bool res = true;
 
-    for (const auto &o : interpreter->outputs())
+    for (int32_t idx = 0; idx < output_count; idx++)
     {
-      auto expected = tensor_loader.get(o);
-      auto obtained = nnfw::tflite::TensorView<float>::make(*interpreter, o);
+      auto expected = tensor_loader.getOutput(idx);
+      auto const tensor = TfLiteInterpreterGetOutputTensor(interpreter, idx);
+      auto obtained = nnfw::tflite::TensorView<float>::make(tensor);
 
-      res = res && app.compareSingleTensorView(expected, obtained, o);
+      res = res && app.compareSingleTensorView(expected, obtained, idx);
     }
 
     if (!res)
diff --git a/tests/tools/tflite_vanilla_run/CMakeLists.txt b/tests/tools/tflite_vanilla_run/CMakeLists.txt
deleted file mode 100644
index 19e21e923..000000000
--- a/tests/tools/tflite_vanilla_run/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-if(NOT BUILD_TFLITE_VANILLA_RUN)
-  return()
-endif()
-
-if(NOT BUILD_TENSORFLOW_LITE_2_3_0)
-  set(BUILD_TENSORFLOW_LITE_2_3_0 ON)
-endif()
-
-nnfw_find_package(TensorFlowLite-2.3.0 REQUIRED)
-nnfw_find_package(Boost REQUIRED)
-
-list(APPEND TFLITE_RUN_SRCS "src/tflite_vanilla_run.cc")
-list(APPEND TFLITE_RUN_SRCS "src/args.cc")
-
-add_executable(tflite_vanilla_run ${TFLITE_RUN_SRCS})
-target_include_directories(tflite_vanilla_run PRIVATE src)
-target_include_directories(tflite_vanilla_run PRIVATE ${Boost_INCLUDE_DIRS})
-
-target_link_libraries(tflite_vanilla_run tensorflow-lite-2.3.0 ${LIB_PTHREAD} dl)
-target_link_libraries(tflite_vanilla_run ${Boost_PROGRAM_OPTIONS_LIBRARY})
-target_link_libraries(tflite_vanilla_run nnfw_lib_benchmark nnfw_lib_misc)
-
-install(TARGETS tflite_vanilla_run DESTINATION bin)
diff --git a/tests/tools/tflite_vanilla_run/src/args.cc b/tests/tools/tflite_vanilla_run/src/args.cc
deleted file mode 100644
index dc9f250e4..000000000
--- a/tests/tools/tflite_vanilla_run/src/args.cc
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "args.h"
-
-#include <iostream>
-
-namespace TFLiteVanillaRun
-{
-
-Args::Args(const int argc, char **argv) noexcept
-{
-  try
-  {
-    Initialize();
-    Parse(argc, argv);
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << "error during paring args" << e.what() << '\n';
-    exit(1);
-  }
-}
-
-void Args::Initialize(void)
-{
-  try
-  {
-    // General options
-    po::options_description general("General options");
-
-    // clang-format off
-  general.add_options()
-    ("help,h", "Display available options")
-    ("input,i", po::value<std::string>()->default_value(""), "Input filename")
-    ("dump,d", po::value<std::string>()->default_value(""), "Output filename")
-    ("ishapes", po::value<std::vector<int>>()->multitoken(), "Input shapes")
-    ("compare,c", po::value<std::string>()->default_value(""), "filename to be compared with")
-    ("tflite", po::value<std::string>()->required())
-    ("num_runs,r", po::value<int>()->default_value(1), "The number of runs")
-    ("warmup_runs,w", po::value<int>()->default_value(0), "The number of warmup runs")
-    ("run_delay,t", po::value<int>()->default_value(-1), "Delay time(ms) between runs (as default no delay")
-    ("gpumem_poll,g", po::value<bool>()->default_value(false), "Check gpu memory polling separately")
-    ("mem_poll,m", po::value<bool>()->default_value(false), "Check memory polling")
-    ("write_report,p", po::value<bool>()->default_value(false), "Write report")
-    ("validate", po::value<bool>()->default_value(true), "Validate tflite model")
-    ("verbose_level,v", po::value<int>()->default_value(0), "Verbose level\n"
-         "0: prints the only result. Messages btw run don't print\n"
-         "1: prints result and message btw run\n"
-         "2: prints all of messages to print\n")
-    ;
-    // clang-format on
-
-    _options.add(general);
-    _positional.add("tflite", 1);
-  }
-  catch (const std::bad_cast &e)
-  {
-    std::cerr << "error by bad cast during initialization of boost::program_options" << e.what()
-              << '\n';
-    exit(1);
-  }
-}
-
-void Args::Parse(const int argc, char **argv)
-{
-  po::variables_map vm;
-  po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
-            vm);
-
-  {
-    auto conflicting_options = [&](const std::string &o1, const std::string &o2) {
-      if ((vm.count(o1) && !vm[o1].defaulted()) && (vm.count(o2) && !vm[o2].defaulted()))
-      {
-        throw boost::program_options::error(std::string("Two options '") + o1 + "' and '" + o2 +
-                                            "' cannot be given at once.");
-      }
-    };
-
-    conflicting_options("input", "compare");
-  }
-
-  if (vm.count("help"))
-  {
-    std::cout << "tflite_run\n\n";
-    std::cout << "Usage: " << argv[0] << " <.tflite> [<options>]\n\n";
-    std::cout << _options;
-    std::cout << "\n";
-
-    exit(0);
-  }
-
-  po::notify(vm);
-
-  if (vm.count("dump"))
-  {
-    _dump_filename = vm["dump"].as<std::string>();
-  }
-
-  if (vm.count("compare"))
-  {
-    _compare_filename = vm["compare"].as<std::string>();
-  }
-
-  if (vm.count("input"))
-  {
-    _input_filename = vm["input"].as<std::string>();
-
-    if (!_input_filename.empty())
-    {
-      if (access(_input_filename.c_str(), F_OK) == -1)
-      {
-        std::cerr << "input image file not found: " << _input_filename << "\n";
-      }
-    }
-  }
-
-  if (vm.count("ishapes"))
-  {
-    _input_shapes.resize(vm["ishapes"].as<std::vector<int>>().size());
-    for (auto i = 0; i < _input_shapes.size(); i++)
-    {
-      _input_shapes[i] = vm["ishapes"].as<std::vector<int>>()[i];
-    }
-  }
-
-  if (vm.count("tflite"))
-  {
-    _tflite_filename = vm["tflite"].as<std::string>();
-
-    if (_tflite_filename.empty())
-    {
-      // TODO Print usage instead of the below message
-      std::cerr << "Please specify tflite file. Run with `--help` for usage."
-                << "\n";
-
-      exit(1);
-    }
-    else
-    {
-      if (access(_tflite_filename.c_str(), F_OK) == -1)
-      {
-        std::cerr << "tflite file not found: " << _tflite_filename << "\n";
-        exit(1);
-      }
-    }
-  }
-
-  if (vm.count("num_runs"))
-  {
-    _num_runs = vm["num_runs"].as<int>();
-  }
-
-  if (vm.count("warmup_runs"))
-  {
-    _warmup_runs = vm["warmup_runs"].as<int>();
-  }
-
-  if (vm.count("run_delay"))
-  {
-    _run_delay = vm["run_delay"].as<int>();
-  }
-
-  if (vm.count("gpumem_poll"))
-  {
-    _gpumem_poll = vm["gpumem_poll"].as<bool>();
-  }
-
-  if (vm.count("mem_poll"))
-  {
-    _mem_poll = vm["mem_poll"].as<bool>();
-    // Instead of EXECUTE to avoid overhead, memory polling runs on WARMUP
-    if (_mem_poll && _warmup_runs == 0)
-    {
-      _warmup_runs = 1;
-    }
-  }
-
-  if (vm.count("write_report"))
-  {
-    _write_report = vm["write_report"].as<bool>();
-  }
-
-  if (vm.count("validate"))
-  {
-    _tflite_validate = vm["validate"].as<bool>();
-  }
-
-  if (vm.count("verbose_level"))
-  {
-    _verbose_level = vm["verbose_level"].as<int>();
-  }
-}
-
-} // end of namespace TFLiteVanillaRun
diff --git a/tests/tools/tflite_vanilla_run/src/args.h b/tests/tools/tflite_vanilla_run/src/args.h
deleted file mode 100644
index 3605b651c..000000000
--- a/tests/tools/tflite_vanilla_run/src/args.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __TFLITE_VANILLA_RUN_ARGS_H__
-#define __TFLITE_VANILLA_RUN_ARGS_H__
-
-#include <string>
-#include <boost/program_options.hpp>
-
-namespace po = boost::program_options;
-
-namespace TFLiteVanillaRun
-{
-
-class Args
-{
-public:
-  Args(const int argc, char **argv) noexcept;
-  void print(void);
-
-  const std::string &getTFLiteFilename(void) const { return _tflite_filename; }
-  const std::string &getDumpFilename(void) const { return _dump_filename; }
-  const std::string &getCompareFilename(void) const { return _compare_filename; }
-  const std::string &getInputFilename(void) const { return _input_filename; }
-  const std::vector<int> &getInputShapes(void) const { return _input_shapes; }
-  const int getNumRuns(void) const { return _num_runs; }
-  const int getWarmupRuns(void) const { return _warmup_runs; }
-  const int getRunDelay(void) const { return _run_delay; }
-  const bool getGpuMemoryPoll(void) const { return _gpumem_poll; }
-  const bool getMemoryPoll(void) const { return _mem_poll; }
-  const bool getWriteReport(void) const { return _write_report; }
-  const bool getModelValidate(void) const { return _tflite_validate; }
-  const int getVerboseLevel(void) const { return _verbose_level; }
-
-private:
-  void Initialize();
-  void Parse(const int argc, char **argv);
-
-private:
-  po::positional_options_description _positional;
-  po::options_description _options;
-
-  std::string _tflite_filename;
-  std::string _dump_filename;
-  std::string _compare_filename;
-  std::string _input_filename;
-  std::vector<int> _input_shapes;
-  int _num_runs;
-  int _warmup_runs;
-  int _run_delay;
-  bool _gpumem_poll;
-  bool _mem_poll;
-  bool _write_report;
-  bool _tflite_validate;
-  int _verbose_level;
-};
-
-} // end of namespace TFLiteVanillaRun
-
-#endif // __TFLITE_VANILLA_RUN_ARGS_H__
diff --git a/tests/tools/tflite_vanilla_run/src/tensor_view.h b/tests/tools/tflite_vanilla_run/src/tensor_view.h
deleted file mode 100644
index ca04a051e..000000000
--- a/tests/tools/tflite_vanilla_run/src/tensor_view.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file     TensorView.h
- * @brief    This file contains TensorView class
- * @ingroup  COM_AI_RUNTIME
- */
-
-#ifndef __TFLITE_VANILLA_RUN_TENSOR_VIEW_H__
-#define __TFLITE_VANILLA_RUN_TENSOR_VIEW_H__
-
-#include "tensorflow/lite/interpreter.h"
-
-#include "misc/tensor/Shape.h"
-#include "misc/tensor/Index.h"
-#include "misc/tensor/Reader.h"
-#include "misc/tensor/NonIncreasingStride.h"
-
-namespace TFLiteVanillaRun
-{
-
-/**
- * @brief Class to define TensorView which is inherited from nnfw::misc::tensor::Reader<T> class
- */
-template <typename T> class TensorView final : public nnfw::misc::tensor::Reader<T>
-{
-public:
-  /**
-   * @brief Construct a TensorView object with base and shape informations
-   * @param[in] shape The shape of a tensor
-   * @param[in] base The base address of a tensor
-   */
-  TensorView(const nnfw::misc::tensor::Shape &shape, T *base) : _shape{shape}, _base{base}
-  {
-    // Set 'stride'
-    _stride.init(_shape);
-  }
-
-public:
-  /**
-   * @brief Get shape of tensor
-   * @return Reference of shape
-   */
-  const nnfw::misc::tensor::Shape &shape(void) const { return _shape; }
-
-public:
-  /**
-   * @brief Get value of tensor index
-   * @param[in] index The tensor index
-   * @return The value at the index
-   */
-  T at(const nnfw::misc::tensor::Index &index) const override
-  {
-    const auto offset = _stride.offset(index);
-    return *(_base + offset);
-  }
-
-public:
-  /**
-   * @brief Get reference value of tensor index
-   * @param[in] index The tensor index
-   * @return The reference value at the index
-   */
-  T &at(const nnfw::misc::tensor::Index &index)
-  {
-    const auto offset = _stride.offset(index);
-    return *(_base + offset);
-  }
-
-private:
-  nnfw::misc::tensor::Shape _shape; /**< The tensor shape */
-
-public:
-  T *_base;                                        /**< The base address of tensor */
-  nnfw::misc::tensor::NonIncreasingStride _stride; /**< The NonIncreasingStride object */
-
-public:
-  // TODO Introduce Operand ID class
-  /**
-   * @brief Create TensorView object using given parameters
-   * @param[in] interp The TfLite interpreter
-   * @param[in] tensor_index The tensor index
-   * @return The new TensorView<T> object
-   */
-  static TensorView<T> make(::tflite::Interpreter &interp, int tensor_index)
-  {
-    auto tensor_ptr = interp.tensor(tensor_index);
-
-    // Set 'shape'
-    nnfw::misc::tensor::Shape shape(tensor_ptr->dims->size);
-
-    for (uint32_t axis = 0; axis < shape.rank(); ++axis)
-    {
-      shape.dim(axis) = tensor_ptr->dims->data[axis];
-    }
-
-    return TensorView<T>(shape, interp.typed_tensor<T>(tensor_index));
-  }
-};
-
-} // namespace TFLiteVanillaRun
-
-#endif // __TFLITE_VANILLA_RUN_TENSOR_VIEW_H__
diff --git a/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc b/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc
deleted file mode 100644
index d44ea60cf..000000000
--- a/tests/tools/tflite_vanilla_run/src/tflite_vanilla_run.cc
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "tensorflow/lite/model.h"
-#include "tensorflow/lite/kernels/register.h"
-
-#include "args.h"
-#include "tensor_view.h"
-#include "misc/EnvVar.h"
-#include "misc/RandomGenerator.h"
-#include "misc/tensor/IndexIterator.h"
-#include "misc/tensor/Object.h"
-#include "benchmark.h"
-
-#include <iostream>
-#include <chrono>
-#include <algorithm>
-#include <vector>
-#include <memory>
-
-using namespace std::placeholders; // for _1, _2 ...
-
-#define TFLITE_ENSURE(exp)                                             \
-  {                                                                    \
-    const TfLiteStatus status = (exp);                                 \
-                                                                       \
-    if (status != kTfLiteOk)                                           \
-    {                                                                  \
-      std::ostringstream ss;                                           \
-      ss << #exp << " failed (" << __FILE__ << ":" << __LINE__ << ")"; \
-      throw std::runtime_error{ss.str()};                              \
-    }                                                                  \
-  }
-
-namespace
-{
-
-void print_max_idx(float *f, int size)
-{
-  float *p = std::max_element(f, f + size);
-  std::cout << "max:" << p - f;
-}
-
-static const char *default_backend_cand = "tflite_cpu";
-
-// Verifies whether the model is a flatbuffer file.
-class BMFlatBufferVerifier : public tflite::TfLiteVerifier
-{
-public:
-  bool Verify(const char *data, int length, tflite::ErrorReporter *reporter) override
-  {
-
-    flatbuffers::Verifier verifier(reinterpret_cast<const uint8_t *>(data), length);
-    if (!tflite::VerifyModelBuffer(verifier))
-    {
-      reporter->Report("The model is not a valid Flatbuffer file");
-      return false;
-    }
-    return true;
-  }
-};
-
-} // namespace anonymous
-
-int main(const int argc, char **argv)
-{
-  tflite::StderrReporter error_reporter;
-
-  TFLiteVanillaRun::Args args(argc, argv);
-
-  std::chrono::milliseconds t_model_load(0), t_prepare(0);
-
-  // TODO Apply verbose level to phases
-  const int verbose = args.getVerboseLevel();
-  benchmark::Phases phases(
-      benchmark::PhaseOption{args.getMemoryPoll(), args.getGpuMemoryPoll(), args.getRunDelay()});
-
-  std::unique_ptr<tflite::FlatBufferModel> model;
-  std::unique_ptr<tflite::Interpreter> interpreter;
-  std::unique_ptr<tflite::TfLiteVerifier> verifier{new BMFlatBufferVerifier};
-
-  try
-  {
-    phases.run("MODEL_LOAD", [&](const benchmark::Phase &, uint32_t) {
-      if (args.getModelValidate())
-      {
-        model = tflite::FlatBufferModel::VerifyAndBuildFromFile(args.getTFLiteFilename().c_str(),
-                                                                verifier.get(), &error_reporter);
-      }
-      else
-      {
-        model = tflite::FlatBufferModel::BuildFromFile(args.getTFLiteFilename().c_str(),
-                                                       &error_reporter);
-      }
-      if (model == nullptr)
-      {
-        throw std::runtime_error{"Cannot create model"};
-      }
-
-      // Use tflite's resolver, not onert's one
-      tflite::ops::builtin::BuiltinOpResolver resolver;
-      tflite::InterpreterBuilder builder(*model, resolver);
-      TFLITE_ENSURE(builder(&interpreter))
-      interpreter->SetNumThreads(nnfw::misc::EnvVar("THREAD").asInt(-1));
-    });
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << e.what() << '\n';
-    return 1;
-  }
-
-  const bool use_nnapi = nnfw::misc::EnvVar("USE_NNAPI").asBool(false);
-
-  try
-  {
-    phases.run("PREPARE", [&](const benchmark::Phase &, uint32_t) {
-      interpreter->UseNNAPI(use_nnapi);
-      interpreter->AllocateTensors();
-    });
-  }
-  catch (const std::exception &e)
-  {
-    std::cerr << e.what() << '\n';
-    return 1;
-  }
-
-  const int seed = 1; /* TODO Add an option for seed value */
-  nnfw::misc::RandomGenerator randgen{seed, 0.0f, 2.0f};
-
-  // No input specified. So we fill the input tensors with random values.
-  for (const auto &o : interpreter->inputs())
-  {
-    TfLiteTensor *tensor = interpreter->tensor(o);
-    if (tensor->type == kTfLiteInt32)
-    {
-      // Generate singed 32-bit integer (s32) input
-      auto tensor_view = TFLiteVanillaRun::TensorView<int32_t>::make(*interpreter, o);
-
-      int32_t value = 0;
-
-      nnfw::misc::tensor::iterate(tensor_view.shape())
-          << [&](const nnfw::misc::tensor::Index &ind) {
-               // TODO Generate random values
-               // Gather operation: index should be within input coverage.
-               tensor_view.at(ind) = value;
-               value++;
-             };
-    }
-    else if (tensor->type == kTfLiteUInt8)
-    {
-      // Generate unsigned 8-bit integer input
-      auto tensor_view = TFLiteVanillaRun::TensorView<uint8_t>::make(*interpreter, o);
-
-      uint8_t value = 0;
-
-      nnfw::misc::tensor::iterate(tensor_view.shape())
-          << [&](const nnfw::misc::tensor::Index &ind) {
-               // TODO Generate random values
-               tensor_view.at(ind) = value;
-               value = (value + 1) & 0xFF;
-             };
-    }
-    else if (tensor->type == kTfLiteBool)
-    {
-      // Generate bool input
-      auto tensor_view = TFLiteVanillaRun::TensorView<bool>::make(*interpreter, o);
-
-      auto fp = static_cast<bool (nnfw::misc::RandomGenerator::*)(
-          const ::nnfw::misc::tensor::Shape &, const ::nnfw::misc::tensor::Index &)>(
-          &nnfw::misc::RandomGenerator::generate<bool>);
-      const nnfw::misc::tensor::Object<bool> data(tensor_view.shape(),
-                                                  std::bind(fp, randgen, _1, _2));
-
-      nnfw::misc::tensor::iterate(tensor_view.shape())
-          << [&](const nnfw::misc::tensor::Index &ind) {
-               const auto value = data.at(ind);
-               tensor_view.at(ind) = value;
-             };
-    }
-    else
-    {
-      assert(tensor->type == kTfLiteFloat32);
-
-      const float *end = reinterpret_cast<const float *>(tensor->data.raw_const + tensor->bytes);
-      for (float *ptr = tensor->data.f; ptr < end; ptr++)
-      {
-        *ptr = randgen.generate<float>();
-      }
-    }
-  }
-
-  std::cout << "input tensor indices = [";
-  for (const auto &o : interpreter->inputs())
-  {
-    std::cout << o << ",";
-  }
-  std::cout << "]" << std::endl;
-
-  // NOTE: Measuring memory can't avoid taking overhead. Therefore, memory will be measured on the
-  // only warmup.
-  if (verbose == 0)
-  {
-    phases.run("WARMUP", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
-               args.getWarmupRuns());
-    phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
-               args.getNumRuns(), true);
-  }
-  else
-  {
-    phases.run("WARMUP", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
-               [&](const benchmark::Phase &phase, uint32_t nth) {
-                 std::cout << "... "
-                           << "warmup " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
-                           << std::endl;
-               },
-               args.getWarmupRuns());
-    phases.run("EXECUTE", [&](const benchmark::Phase &, uint32_t) { interpreter->Invoke(); },
-               [&](const benchmark::Phase &phase, uint32_t nth) {
-                 std::cout << "... "
-                           << "run " << nth + 1 << " takes " << phase.time[nth] / 1e3 << " ms"
-                           << std::endl;
-               },
-               args.getNumRuns(), true);
-  }
-
-  std::cout << "output tensor indices = [";
-  for (const auto &o : interpreter->outputs())
-  {
-    std::cout << o << "(";
-
-    print_max_idx(interpreter->tensor(o)->data.f, interpreter->tensor(o)->bytes / sizeof(float));
-
-    std::cout << "),";
-  }
-  std::cout << "]" << std::endl;
-
-  // TODO Apply verbose level to result
-
-  // prepare result
-  benchmark::Result result(phases);
-
-  // to stdout
-  benchmark::printResult(result);
-
-  if (args.getWriteReport())
-  {
-    // prepare csv task
-    std::string exec_basename;
-    std::string model_basename;
-    std::string backend_name = default_backend_cand;
-    {
-      std::vector<char> vpath(args.getTFLiteFilename().begin(), args.getTFLiteFilename().end() + 1);
-      model_basename = basename(vpath.data());
-      size_t lastindex = model_basename.find_last_of(".");
-      model_basename = model_basename.substr(0, lastindex);
-      exec_basename = basename(argv[0]);
-    }
-    benchmark::writeResult(result, exec_basename, model_basename, backend_name);
-  }
-
-  return 0;
-}