15 files changed, 2794 insertions, 0 deletions
diff --git a/compiler/circle-eval-diff/CMakeLists.txt b/compiler/circle-eval-diff/CMakeLists.txt
new file mode 100644
index 000000000..d5a62301c
--- /dev/null
+++ b/compiler/circle-eval-diff/CMakeLists.txt
@@ -0,0 +1,42 @@
+set(DRIVER "driver/Driver.cpp")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_executable(circle-eval-diff ${DRIVER} ${SOURCES})
+target_include_directories(circle-eval-diff PRIVATE include)
+target_include_directories(circle-eval-diff PRIVATE src)
+
+target_link_libraries(circle-eval-diff arser)
+target_link_libraries(circle-eval-diff safemain)
+target_link_libraries(circle-eval-diff foder)
+target_link_libraries(circle-eval-diff loco)
+target_link_libraries(circle-eval-diff luci_import)
+target_link_libraries(circle-eval-diff luci_lang)
+target_link_libraries(circle-eval-diff luci_interpreter)
+target_link_libraries(circle-eval-diff dio_hdf5)
+target_link_libraries(circle-eval-diff vconone)
+
+install(TARGETS circle-eval-diff DESTINATION bin)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+# circle-eval-diff is executable, so we do not link it to the test.
+# Instead, we use TEST_SOURCES to specify sources uesd for tests.
+set(TEST_SOURCES
+    "src/MetricPrinter.cpp"
+    "src/Tensor.cpp"
+    "src/InputDataLoader.cpp")
+
+nnas_find_package(GTest REQUIRED)
+GTest_AddTest(circle_eval_diff_test ${TESTS} ${TEST_SOURCES})
+target_include_directories(circle_eval_diff_test PRIVATE include)
+target_include_directories(circle_eval_diff_test PRIVATE src)
+target_link_libraries(circle_eval_diff_test luci_testhelper)
+target_link_libraries(circle_eval_diff_test nncc_coverage)
+target_link_libraries(circle_eval_diff_test dio_hdf5)
+target_link_libraries(circle_eval_diff_test loco)
+target_link_libraries(circle_eval_diff_test luci_lang)
diff --git a/compiler/circle-eval-diff/README.md b/compiler/circle-eval-diff/README.md
new file mode 100644
index 000000000..a3727cc6d
--- /dev/null
+++ b/compiler/circle-eval-diff/README.md
@@ -0,0 +1,51 @@
+# circle-eval-diff
+
+_circle-eval-diff_ compares inference results of two circle models.
+
+## Use cases
+
+1. _circle-eval-diff_ can be used to evaluate reconstruction errors of quantized models.
+2. _circle-eval-diff_ can be used to verify optimization (or any kind of value-preserving conversion) is safe.
+
+## Usage
+
+Run circle-eval-diff with the following arguments.
+
+--first_input_model: first model to compare (.circle).
+
+--second_input_model: second model to compare (.circle).
+
+--first_input_data: input data for the first model (.h5, directory). Random data will be used if this argument is not given.
+
+--second_input_data: input data for the second model (.h5, directory). Random data will be used if this argument is not given.
+
+--input_data_format: input data format (h5 (default), directory).
+
+--metric: metric to compare inference results (MAE (default), etc).
+
+```
+$ ./circle-eval-diff
+  --first_input_model <first_input_model>
+  --second_input_model <second_input_model>
+  --first_input_data <first_input_data>
+  --second_input_data <second_input_data>
+  --input_data_format <data_format>
+  --metric <metric>
+```
+
+For example,
+```
+$ ./circle-eval-diff
+  --first_input_model A.circle
+  --second_input_model B.circle
+  --first_input_data A.h5
+  --second_input_data B.h5
+  --input_data_format h5
+  --metric MAE
+```
+
+It will print MAE (Mean Absolute Error) between the inference result of A.circle with A.h5 and that of B.circle with B.h5.
+
+## Note
+
+Circle models are executed by _luci-interpreter_.
diff --git a/compiler/circle-eval-diff/driver/Driver.cpp b/compiler/circle-eval-diff/driver/Driver.cpp
new file mode 100644
index 000000000..7e63ec88c
--- /dev/null
+++ b/compiler/circle-eval-diff/driver/Driver.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleEvalDiff.h"
+
+#include <arser/arser.h>
+#include <vconone/vconone.h>
+
+using namespace circle_eval_diff;
+
+namespace
+{
+
+std::string to_lower_case(std::string s)
+{
+  std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { return std::tolower(c); });
+  return s;
+}
+
+InputFormat to_input_format(const std::string &str)
+{
+  auto small_str = to_lower_case(str);
+  if (small_str.compare("h5") == 0)
+    return InputFormat::H5;
+
+  if (small_str.compare("directory") == 0 || small_str.compare("dir") == 0)
+    return InputFormat::DIR;
+
+  throw std::runtime_error("Unsupported input format.");
+}
+
+void print_version(void)
+{
+  std::cout << "circle-eval-diff version " << vconone::get_string() << std::endl;
+  std::cout << vconone::get_copyright() << std::endl;
+}
+
+} // namespace
+
+int entry(const int argc, char **argv)
+{
+  arser::Arser arser("Compare inference results of two circle models");
+
+  arser::Helper::add_version(arser, print_version);
+
+  arser.add_argument("--first_model").required(true).help("First input model filepath");
+
+  arser.add_argument("--second_model").required(true).help("Second input model filepath");
+
+  arser.add_argument("--first_input_data")
+    .help("Input data filepath for the first model. If not given, circle-eval-diff will run with "
+          "randomly generated data");
+
+  arser.add_argument("--second_input_data")
+    .help("Input data filepath for the second model. If not given, circle-eval-diff will run with "
+          "randomly generated data");
+
+  arser.add_argument("--dump_output_with_prefix")
+    .help("Dump output to files. <prefix> should be given as an argument. "
+          "Outputs are saved in <prefix>.<data_index>.first.output<output_index> and "
+          "<prefix>.<data_index>.second.output<output_index>.");
+
+  arser.add_argument("--print_mae").nargs(0).default_value(false).help("Print Mean Absolute Error");
+
+  arser.add_argument("--print_mape")
+    .nargs(0)
+    .default_value(false)
+    .help("Print Mean Absolute PercentageError");
+
+  arser.add_argument("--print_mpeir")
+    .nargs(0)
+    .default_value(false)
+    .help("Print Mean Peak Error to Interval Ratio");
+
+  arser.add_argument("--print_top1_match")
+    .nargs(0)
+    .default_value(false)
+    .help("Print Mean Top-1 Match Ratio");
+
+  arser.add_argument("--print_top5_match")
+    .nargs(0)
+    .default_value(false)
+    .help("Print Mean Top-5 Match Ratio");
+
+  arser.add_argument("--print_mse").nargs(0).default_value(false).help("Print Mean Squared Error");
+
+  arser.add_argument("--input_data_format")
+    .default_value("h5")
+    .help("Input data format. h5/hdf5 (default) or directory");
+
+  try
+  {
+    arser.parse(argc, argv);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cout << err.what() << std::endl;
+    std::cout << arser;
+    return 255;
+  }
+
+  const auto first_model_path = arser.get<std::string>("--first_model");
+  const auto second_model_path = arser.get<std::string>("--second_model");
+
+  // Default values
+  std::string first_input_data_path;
+  std::string second_input_data_path;
+  std::string metric;
+  std::string input_data_format;
+  std::string output_prefix;
+
+  if (arser["--first_input_data"])
+    first_input_data_path = arser.get<std::string>("--first_input_data");
+
+  if (arser["--second_input_data"])
+    second_input_data_path = arser.get<std::string>("--second_input_data");
+
+  if (arser["--first_input_data"] != arser["--second_input_data"])
+    throw std::runtime_error("Input data path should be given for both first_model and "
+                             "second_model, or neither must be given.");
+
+  if (arser["--dump_output_with_prefix"])
+    output_prefix = arser.get<std::string>("--dump_output_with_prefix");
+
+  // Set Metrics
+  std::vector<Metric> metrics;
+  if (arser["--print_mae"] and arser.get<bool>("--print_mae"))
+  {
+    metrics.emplace_back(Metric::MAE);
+  }
+  if (arser["--print_mape"] and arser.get<bool>("--print_mape"))
+  {
+    metrics.emplace_back(Metric::MAPE);
+  }
+  if (arser["--print_mpeir"] and arser.get<bool>("--print_mpeir"))
+  {
+    metrics.emplace_back(Metric::MPEIR);
+  }
+  if (arser["--print_top1_match"] and arser.get<bool>("--print_top1_match"))
+  {
+    metrics.emplace_back(Metric::MTOP1);
+  }
+  if (arser["--print_top5_match"] and arser.get<bool>("--print_top5_match"))
+  {
+    metrics.emplace_back(Metric::MTOP5);
+  }
+  if (arser["--print_mse"] and arser.get<bool>("--print_mse"))
+  {
+    metrics.emplace_back(Metric::MSE);
+  }
+
+  input_data_format = arser.get<std::string>("--input_data_format");
+
+  auto ctx = std::make_unique<CircleEvalDiff::Context>();
+  {
+    ctx->first_model_path = first_model_path;
+    ctx->second_model_path = second_model_path;
+    ctx->first_input_data_path = first_input_data_path;
+    ctx->second_input_data_path = second_input_data_path;
+    ctx->metric = metrics;
+    ctx->input_format = to_input_format(input_data_format);
+    ctx->output_prefix = output_prefix;
+  }
+
+  CircleEvalDiff ced(std::move(ctx));
+
+  ced.init();
+
+  ced.evalDiff();
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/circle-eval-diff/include/CircleEvalDiff.h b/compiler/circle-eval-diff/include/CircleEvalDiff.h
new file mode 100644
index 000000000..7894480ac
--- /dev/null
+++ b/compiler/circle-eval-diff/include/CircleEvalDiff.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EVAL_DIFF_H__
+#define __CIRCLE_EVAL_DIFF_H__
+
+#include <luci/IR/Module.h>
+#include <luci_interpreter/Interpreter.h>
+
+#include "InputDataLoader.h"
+#include "MetricPrinter.h"
+
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace circle_eval_diff
+{
+
+// Forward declaration
+class ModuleEvalDiff;
+
+enum class Metric
+{
+  Undefined, // For debugging
+  MAE,       // Mean Absolute Error
+  MAPE,      // Mean Percentage Absolute Error
+  MPEIR,     // Mean Peak Error to Interval Ratio
+  MTOP1,     // Mean Top-1 Match Ratio
+  MTOP5,     // Mean Top-5 Match Ratio
+  MSE,       // Mean Squared Error
+};
+
+class CircleEvalDiff final
+{
+public:
+  struct Context
+  {
+    std::string first_model_path;
+    std::string second_model_path;
+    std::string first_input_data_path;
+    std::string second_input_data_path;
+    std::vector<Metric> metric;
+    InputFormat input_format = InputFormat::Undefined;
+    std::string output_prefix;
+  };
+
+public:
+  CircleEvalDiff(std::unique_ptr<Context> &&ctx);
+
+  ~CircleEvalDiff();
+
+  void init();
+
+  // Evaluate two circle models for the given input data and compare the results
+  void evalDiff(void) const;
+
+private:
+  std::unique_ptr<Context> _ctx;
+  std::unique_ptr<luci::Module> _first_module;
+  std::unique_ptr<luci::Module> _second_module;
+  std::vector<std::unique_ptr<MetricPrinter>> _metrics;
+};
+
+} // namespace circle_eval_diff
+
+#endif // __CIRCLE_EVAL_DIFF_H__
diff --git a/compiler/circle-eval-diff/requires.cmake b/compiler/circle-eval-diff/requires.cmake
new file mode 100644
index 000000000..cae9b7c62
--- /dev/null
+++ b/compiler/circle-eval-diff/requires.cmake
@@ -0,0 +1,7 @@
+require("loco")
+require("luci")
+require("luci-interpreter")
+require("dio-hdf5")
+require("safemain")
+require("arser")
+require("vconone")
diff --git a/compiler/circle-eval-diff/src/CircleEvalDiff.cpp b/compiler/circle-eval-diff/src/CircleEvalDiff.cpp
new file mode 100644
index 000000000..43e026bf6
--- /dev/null
+++ b/compiler/circle-eval-diff/src/CircleEvalDiff.cpp
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleEvalDiff.h"
+#include "InputDataLoader.h"
+#include "MetricPrinter.h"
+#include "Tensor.h"
+
+#include <foder/FileLoader.h>
+#include <luci/Importer.h>
+
+#include <stdexcept>
+
+namespace
+{
+
+bool same_shape(const luci::CircleNode *a, const luci::CircleNode *b)
+{
+  if (a->rank() != b->rank())
+    return false;
+
+  for (uint32_t i = 0; i < a->rank(); i++)
+  {
+    if (not(a->dim(i) == b->dim(i)))
+      return false;
+  }
+
+  return true;
+}
+
+bool same_dtype(const luci::CircleNode *a, const luci::CircleNode *b)
+{
+  return a->dtype() == b->dtype();
+}
+
+std::unique_ptr<luci::Module> import(const std::string &model_path)
+{
+  // Load model from the file
+  foder::FileLoader loader{model_path};
+  std::vector<char> model_data = loader.load();
+
+  // Verify flatbuffers
+  flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model_data.data()),
+                                 model_data.size()};
+  if (not circle::VerifyModelBuffer(verifier))
+  {
+    throw std::runtime_error("Failed to verify circle '" + model_path + "'");
+  }
+
+  auto circle_model = circle::GetModel(model_data.data());
+
+  if (not circle_model)
+    throw std::runtime_error("Failed to load '" + model_path + "'");
+
+  auto module = luci::Importer().importModule(circle_model);
+
+  if (not module)
+    throw std::runtime_error("Failed to load '" + model_path + "'");
+
+  return module;
+}
+
+const std::vector<loco::Node *> inputs_of(const luci::Module *module)
+{
+  return loco::input_nodes(module->graph());
+}
+
+const std::vector<loco::Node *> outputs_of(const luci::Module *module)
+{
+  return loco::output_nodes(module->graph());
+}
+
+void writeDataToFile(const std::string &filename, const char *data, size_t data_size)
+{
+  std::ofstream fs(filename, std::ofstream::binary);
+  if (fs.fail())
+    throw std::runtime_error("Cannot open file \"" + filename + "\".\n");
+  if (fs.write(data, data_size).fail())
+  {
+    throw std::runtime_error("Failed to write data to file \"" + filename + "\".\n");
+  }
+}
+
+void checkOutputs(const luci::Module *first, const luci::Module *second)
+{
+  const auto first_output = outputs_of(first);
+  const auto second_output = outputs_of(second);
+
+  if (first_output.size() != second_output.size())
+    throw std::runtime_error("Models have different output counts");
+
+  for (uint32_t i = 0; i < first_output.size(); i++)
+  {
+    const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]);
+    const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]);
+
+    if (not same_shape(first_node, second_node))
+      throw std::runtime_error("Output shape mismatch (" + first_node->name() + ", " +
+                               second_node->name() + ")");
+
+    if (not same_dtype(first_node, second_node))
+      throw std::runtime_error("Output dtype mismatch (" + first_node->name() + ", " +
+                               second_node->name() + ")");
+  }
+}
+
+} // namespace
+
+namespace circle_eval_diff
+{
+
+std::vector<std::shared_ptr<Tensor>> interpret(const luci::Module *module,
+                                               const InputDataLoader::Data &data)
+{
+  auto interpreter = std::make_unique<luci_interpreter::Interpreter>(module);
+
+  auto input_nodes = ::inputs_of(module);
+  auto output_nodes = ::outputs_of(module);
+
+  for (uint32_t input_idx = 0; input_idx < data.size(); input_idx++)
+  {
+    auto input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+    assert(input_node->index() == input_idx);
+
+    auto input_data = data.at(input_idx);
+    interpreter->writeInputTensor(input_node, input_data.buffer(), input_data.byte_size());
+  }
+
+  interpreter->interpret();
+
+  std::vector<std::shared_ptr<Tensor>> outputs;
+  for (uint32_t output_idx = 0; output_idx < output_nodes.size(); output_idx++)
+  {
+    auto output_node = loco::must_cast<const luci::CircleOutput *>(output_nodes[output_idx]);
+    assert(output_node->index() == output_idx);
+
+    auto tensor = createEmptyTensor(output_node);
+    interpreter->readOutputTensor(output_node, tensor->buffer(), tensor->byte_size());
+    outputs.emplace_back(tensor);
+  }
+
+  return outputs;
+}
+
+CircleEvalDiff::CircleEvalDiff(std::unique_ptr<Context> &&ctx) : _ctx(std::move(ctx))
+{
+  // DO NOTHING
+}
+
+CircleEvalDiff::~CircleEvalDiff() = default;
+
+void CircleEvalDiff::init()
+{
+  _first_module = import(_ctx->first_model_path);
+  _second_module = import(_ctx->second_model_path);
+
+  // Check modules have the same output signature (dtype/shape)
+  // Exception will be thrown if they have different signature
+  checkOutputs(_first_module.get(), _second_module.get());
+
+  // Set metric
+  std::unique_ptr<MetricPrinter> metric;
+  for (auto metric : _ctx->metric)
+  {
+    switch (metric)
+    {
+      case Metric::MAE:
+      {
+        _metrics.emplace_back(std::make_unique<MAEPrinter>());
+        break;
+      }
+      case Metric::MAPE:
+      {
+        _metrics.emplace_back(std::make_unique<MAPEPrinter>());
+        break;
+      }
+      case Metric::MPEIR:
+      {
+        _metrics.emplace_back(std::make_unique<MPEIRPrinter>());
+        break;
+      }
+      case Metric::MTOP1:
+      {
+        _metrics.emplace_back(std::make_unique<TopKMatchPrinter>(1));
+        break;
+      }
+      case Metric::MTOP5:
+      {
+        _metrics.emplace_back(std::make_unique<TopKMatchPrinter>(5));
+        break;
+      }
+      case Metric::MSE:
+      {
+        _metrics.emplace_back(std::make_unique<MSEPrinter>());
+        break;
+      }
+      default:
+        throw std::runtime_error("Unsupported metric.");
+    }
+    _metrics.back()->init(_first_module.get(), _second_module.get());
+  }
+}
+
+void CircleEvalDiff::evalDiff(void) const
+{
+  auto first_input_loader = circle_eval_diff::makeDataLoader(
+    _ctx->first_input_data_path, _ctx->input_format, ::inputs_of(_first_module.get()));
+  auto second_input_loader = circle_eval_diff::makeDataLoader(
+    _ctx->second_input_data_path, _ctx->input_format, ::inputs_of(_second_module.get()));
+
+  for (uint32_t data_idx = 0; data_idx < first_input_loader->size(); data_idx++)
+  {
+    std::cout << "Evaluating " << data_idx << "'th data" << std::endl;
+
+    auto first_data = first_input_loader->get(data_idx);
+    auto second_data = second_input_loader->get(data_idx);
+
+    auto first_output = interpret(_first_module.get(), first_data);
+    auto second_output = interpret(_second_module.get(), second_data);
+
+    for (auto &metric : _metrics)
+    {
+      metric->accumulate(first_output, second_output);
+    }
+
+    if (_ctx.get()->output_prefix.empty())
+      continue;
+
+    for (uint32_t i = 0; i < first_output.size(); i++)
+    {
+      auto out = first_output[i];
+      writeDataToFile(_ctx.get()->output_prefix + "." + std::to_string(data_idx) + ".first.output" +
+                        std::to_string(i),
+                      (char *)(out->buffer()), out->byte_size());
+    }
+    for (uint32_t i = 0; i < second_output.size(); i++)
+    {
+      auto out = second_output[i];
+      writeDataToFile(_ctx.get()->output_prefix + "." + std::to_string(data_idx) +
+                        ".second.output" + std::to_string(i),
+                      (char *)(out->buffer()), out->byte_size());
+    }
+  }
+
+  for (auto &metric : _metrics)
+  {
+    std::cout << metric.get() << std::endl;
+  }
+}
+
+} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/InputDataLoader.cpp b/compiler/circle-eval-diff/src/InputDataLoader.cpp
new file mode 100644
index 000000000..7b491a37a
--- /dev/null
+++ b/compiler/circle-eval-diff/src/InputDataLoader.cpp
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "InputDataLoader.h"
+
+#include <dio_hdf5/HDF5Importer.h>
+#include <loco/IR/Graph.h>
+#include <luci/IR/CircleNodes.h>
+
+#include <cstring>
+#include <dirent.h>
+#include <fstream>
+#include <vector>
+
+using DataType = loco::DataType;
+using Shape = std::vector<loco::Dimension>;
+
+namespace circle_eval_diff
+{
+
+// Check the type and the shape of CircleInput
+void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype, const Shape &shape)
+{
+  // Type check
+  if (dtype != input_node->dtype())
+    throw std::runtime_error("Wrong input type.");
+
+  if (shape.size() != input_node->rank())
+    throw std::runtime_error("Input rank mismatch.");
+
+  for (uint32_t i = 0; i < shape.size(); i++)
+  {
+    if (not(shape.at(i) == input_node->dim(i)))
+      throw std::runtime_error("Input shape mismatch.");
+  }
+}
+
+std::vector<size_t> getEachByteSizeOf(const std::vector<loco::Node *> &nodes)
+{
+  std::vector<size_t> vec;
+
+  for (const auto node : nodes)
+  {
+    const auto input_node = loco::must_cast<const luci::CircleInput *>(node);
+    const auto dtype_size = loco::size(input_node->dtype());
+    size_t element_size = 1;
+
+    for (uint32_t index = 0; index < input_node->rank(); index++)
+    {
+      element_size *= input_node->dim(index).value();
+    }
+
+    vec.push_back(element_size * dtype_size);
+  }
+
+  return vec;
+}
+
+size_t getTotalByteSizeOf(const std::vector<loco::Node *> &nodes)
+{
+  size_t total_byte_size = 0;
+
+  for (const auto node : nodes)
+  {
+    const auto input_node = loco::must_cast<const luci::CircleInput *>(node);
+    size_t byte_size = loco::size(input_node->dtype());
+
+    for (uint32_t index = 0; index < input_node->rank(); index++)
+    {
+      byte_size *= input_node->dim(index).value();
+    }
+
+    total_byte_size += byte_size;
+  }
+
+  return total_byte_size;
+}
+
+} // namespace circle_eval_diff
+
+namespace circle_eval_diff
+{
+
+HDF5Loader::HDF5Loader(const std::string &file_path, const std::vector<loco::Node *> &input_nodes)
+  : _input_nodes{input_nodes}
+{
+  try
+  {
+    using HDF5Importer = dio::hdf5::HDF5Importer;
+
+    _hdf5 = std::make_unique<HDF5Importer>(file_path);
+    _hdf5->importGroup("value");
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    throw std::runtime_error("HDF5 error occurred.");
+  }
+}
+
+uint32_t HDF5Loader::size(void) const { return _hdf5->numData(); }
+
+InputDataLoader::Data HDF5Loader::get(uint32_t data_idx) const
+{
+  Data data;
+  data.resize(_input_nodes.size());
+
+  for (uint32_t input_idx = 0; input_idx < _input_nodes.size(); input_idx++)
+  {
+    auto input_node = loco::must_cast<luci::CircleInput *>(_input_nodes.at(input_idx));
+    assert(input_node->index() == input_idx);
+
+    data.at(input_idx) = *createEmptyTensor(input_node).get();
+
+    auto input_buffer = data.at(input_idx).buffer();
+    const auto input_buffer_bytes = data.at(input_idx).byte_size();
+
+    try
+    {
+      if (_hdf5->isRawData())
+      {
+        _hdf5->readTensor(data_idx, input_idx, input_buffer, input_buffer_bytes);
+      }
+      else
+      {
+        DataType dtype;
+        Shape shape;
+        _hdf5->readTensor(data_idx, input_idx, &dtype, &shape, input_buffer, input_buffer_bytes);
+
+        // Check the type and the shape of the input data is valid
+        verifyTypeShape(input_node, dtype, shape);
+      }
+    }
+    catch (const H5::Exception &e)
+    {
+      H5::Exception::printErrorStack();
+      throw std::runtime_error("HDF5 error occurred.");
+    }
+  }
+
+  return data;
+}
+
+DirectoryLoader::DirectoryLoader(const std::string &dir_path,
+                                 const std::vector<loco::Node *> &input_nodes)
+  : _input_nodes{input_nodes}
+{
+  DIR *dir = opendir(dir_path.c_str());
+  if (not dir)
+  {
+    throw std::runtime_error("Cannot open directory \"" + dir_path + "\".");
+  }
+
+  struct dirent *entry = nullptr;
+  const auto input_total_bytes = getTotalByteSizeOf(input_nodes);
+  while ((entry = readdir(dir)))
+  {
+    // Skip if the entry is not a regular file
+    if (entry->d_type != DT_REG)
+      continue;
+
+    _data_paths.push_back(dir_path + "/" + entry->d_name);
+  }
+
+  closedir(dir);
+}
+
+uint32_t DirectoryLoader::size(void) const { return _data_paths.size(); }
+
+InputDataLoader::Data DirectoryLoader::get(uint32_t data_idx) const
+{
+  // Read raw data
+  const auto input_total_bytes = getTotalByteSizeOf(_input_nodes);
+  std::vector<char> input_data(input_total_bytes);
+  const auto raw_data_path = _data_paths.at(data_idx);
+  std::ifstream fs(raw_data_path, std::ifstream::binary);
+
+  if (fs.fail())
+  {
+    throw std::runtime_error("Cannot open file \"" + raw_data_path + "\".");
+  }
+  if (fs.read(input_data.data(), input_total_bytes).fail())
+  {
+    throw std::runtime_error("Failed to read raw data from file \"" + raw_data_path + "\".");
+  }
+
+  // Make Tensor from raw data
+  auto input_data_cur = input_data.data();
+
+  Data data;
+  data.resize(_input_nodes.size());
+  std::vector<size_t> input_bytes = getEachByteSizeOf(_input_nodes);
+  for (uint32_t index = 0; index < _input_nodes.size(); index++)
+  {
+    const auto input_node = loco::must_cast<const luci::CircleInput *>(_input_nodes.at(index));
+    auto &tensor = data.at(index);
+    tensor = *createEmptyTensor(input_node).get();
+    auto buffer = tensor.buffer();
+    std::memcpy(buffer, input_data_cur, input_bytes.at(index));
+    input_data_cur += input_bytes.at(index);
+  }
+
+  return data;
+}
+
+std::unique_ptr<InputDataLoader> makeDataLoader(const std::string &file_path,
+                                                const InputFormat &format,
+                                                const std::vector<loco::Node *> &input_nodes)
+{
+  switch (format)
+  {
+    case InputFormat::H5:
+    {
+      return std::make_unique<HDF5Loader>(file_path, input_nodes);
+    }
+    case InputFormat::DIR:
+    {
+      return std::make_unique<DirectoryLoader>(file_path, input_nodes);
+    }
+    default:
+      throw std::runtime_error{"Unsupported input format."};
+  }
+}
+
+} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/InputDataLoader.h b/compiler/circle-eval-diff/src/InputDataLoader.h
new file mode 100644
index 000000000..14921b239
--- /dev/null
+++ b/compiler/circle-eval-diff/src/InputDataLoader.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EVAL_DIFF_INPUT_DATA_LOADER_H__
+#define __CIRCLE_EVAL_DIFF_INPUT_DATA_LOADER_H__
+
+#include <dio_hdf5/HDF5Importer.h>
+#include <loco/IR/Node.h>
+#include <luci/IR/CircleNodes.h>
+
+#include "Tensor.h"
+
+#include <memory>
+#include <string>
+
+namespace circle_eval_diff
+{
+
+void verifyTypeShape(const luci::CircleInput *input_node, const loco::DataType &dtype,
+                     const std::vector<loco::Dimension> &shape);
+
+} // namespace circle_eval_diff
+
+namespace circle_eval_diff
+{
+
+enum class InputFormat
+{
+  Undefined, // For debugging
+  H5,
+  DIR, // directory
+  // TODO Implement Random, Directory
+};
+
+class InputDataLoader
+{
+public:
+  using Data = std::vector<Tensor>;
+
+public:
+  virtual ~InputDataLoader() = default;
+
+public:
+  virtual uint32_t size(void) const = 0;
+
+public:
+  virtual Data get(uint32_t data_idx) const = 0;
+};
+
+class HDF5Loader final : public InputDataLoader
+{
+public:
+  HDF5Loader(const std::string &file_path, const std::vector<loco::Node *> &input_nodes);
+
+public:
+  uint32_t size(void) const final;
+  Data get(uint32_t data_idx) const final;
+
+private:
+  const std::vector<loco::Node *> _input_nodes;
+  std::unique_ptr<dio::hdf5::HDF5Importer> _hdf5;
+};
+
+// This class loads the directory that has raw data binary files.
+class DirectoryLoader final : public InputDataLoader
+{
+public:
+  DirectoryLoader(const std::string &dir_path, const std::vector<loco::Node *> &input_nodes);
+
+public:
+  uint32_t size(void) const final;
+  Data get(uint32_t data_idx) const final;
+
+private:
+  const std::vector<loco::Node *> _input_nodes;
+  std::vector<std::string> _data_paths;
+};
+
+std::unique_ptr<InputDataLoader> makeDataLoader(const std::string &file_path,
+                                                const InputFormat &format,
+                                                const std::vector<loco::Node *> &input_nodes);
+
+} // namespace circle_eval_diff
+
+#endif // __CIRCLE_EVAL_DIFF_INPUT_DATA_LOADER_H__
diff --git a/compiler/circle-eval-diff/src/InputDataLoader.test.cpp b/compiler/circle-eval-diff/src/InputDataLoader.test.cpp
new file mode 100644
index 000000000..cbe78797b
--- /dev/null
+++ b/compiler/circle-eval-diff/src/InputDataLoader.test.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <luci/IR/CircleNodes.h>
+
+#include "InputDataLoader.h"
+
+using namespace circle_eval_diff;
+
+TEST(CircleEvalInputDataLoaderTest, verifyTypeShapeTest)
+{
+  luci::CircleInput input;
+  input.dtype(loco::DataType::FLOAT32);
+  input.rank(4);
+  input.dim(0).set(1);
+  input.dim(1).set(3);
+  input.dim(2).set(3);
+  input.dim(3).set(2);
+
+  loco::DataType right_data_type{loco::DataType::FLOAT32};
+  std::vector<loco::Dimension> right_shape;
+  right_shape.emplace_back(1);
+  right_shape.emplace_back(3);
+  right_shape.emplace_back(3);
+  right_shape.emplace_back(2);
+
+  EXPECT_NO_THROW(verifyTypeShape(&input, right_data_type, right_shape));
+}
+
+TEST(CircleEvalInputDataLoaderTest, verifyTypeShapeTest_NEG)
+{
+  luci::CircleInput input;
+  input.dtype(loco::DataType::FLOAT32);
+  input.rank(4);
+  input.dim(0).set(1);
+  input.dim(1).set(4);
+  input.dim(2).set(4);
+  input.dim(3).set(2);
+
+  loco::DataType right_data_type{loco::DataType::FLOAT32};
+  loco::DataType wrong_data_type{loco::DataType::FLOAT16};
+  std::vector<loco::Dimension> wrong_shape;
+  wrong_shape.emplace_back(1);
+  wrong_shape.emplace_back(3);
+  wrong_shape.emplace_back(3);
+  wrong_shape.emplace_back(2);
+
+  EXPECT_ANY_THROW(verifyTypeShape(&input, right_data_type, wrong_shape));
+  EXPECT_ANY_THROW(verifyTypeShape(&input, wrong_data_type, wrong_shape));
+}
diff --git a/compiler/circle-eval-diff/src/MetricPrinter.cpp b/compiler/circle-eval-diff/src/MetricPrinter.cpp
new file mode 100644
index 000000000..ec8408471
--- /dev/null
+++ b/compiler/circle-eval-diff/src/MetricPrinter.cpp
@@ -0,0 +1,662 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MetricPrinter.h"
+
+#include <luci/IR/CircleNode.h>
+
+#include <limits>
+#include <iostream>
+#include <cassert>
+
+using Tensor = circle_eval_diff::Tensor;
+
+#define THROW_UNLESS(COND, MSG) \
+  if (not(COND))                \
+    throw std::runtime_error(MSG);
+
+namespace
+{
+
+uint32_t num_elems(const luci::CircleNode *node)
+{
+  uint32_t res = 1;
+
+  for (uint32_t i = 0; i < node->rank(); i++)
+    res *= node->dim(i).value();
+
+  return res;
+}
+
+template <typename T> bool same_shape(const T a, const T b)
+{
+  if (a->rank() != b->rank())
+    return false;
+
+  for (uint32_t i = 0; i < a->rank(); i++)
+  {
+    if (not(a->dim(i) == b->dim(i)))
+      return false;
+  }
+
+  return true;
+}
+
+template <typename T> bool same_dtype(const T a, const T b) { return a->dtype() == b->dtype(); }
+
+template <loco::DataType DT> std::shared_ptr<Tensor> to_fp32(const std::shared_ptr<Tensor> &tensor)
+{
+  assert(tensor->dtype() == DT); // FIX_CALLER_UNLESS
+
+  auto fp32_tensor = std::make_shared<Tensor>();
+  {
+    fp32_tensor->dtype(loco::DataType::FLOAT32);
+    fp32_tensor->rank(tensor->rank());
+    for (uint32_t i = 0; i < tensor->rank(); i++)
+      fp32_tensor->dim(i) = tensor->dim(i);
+
+    const auto num_elems = tensor->size<DT>();
+    fp32_tensor->size<loco::DataType::FLOAT32>(num_elems);
+    for (uint32_t i = 0; i < num_elems; i++)
+      fp32_tensor->at<loco::DataType::FLOAT32>(i) = static_cast<float>(tensor->at<DT>(i));
+  }
+  return fp32_tensor;
+}
+
+std::shared_ptr<Tensor> fp32(const std::shared_ptr<Tensor> &tensor)
+{
+  switch (tensor->dtype())
+  {
+    case loco::DataType::FLOAT32:
+      return tensor;
+    case loco::DataType::U8:
+      return to_fp32<loco::DataType::U8>(tensor);
+    case loco::DataType::S16:
+      return to_fp32<loco::DataType::S16>(tensor);
+    default:
+      throw std::runtime_error("Unsupported data type.");
+  }
+}
+
+} // namespace
+
+namespace circle_eval_diff
+{
+
+void MAEPrinter::init(const luci::Module *first, const luci::Module *second)
+{
+  THROW_UNLESS(first != nullptr, "Invalid module.");
+  THROW_UNLESS(second != nullptr, "Invalid module.");
+
+  const auto first_output = loco::output_nodes(first->graph());
+  const auto second_output = loco::output_nodes(second->graph());
+
+  assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < first_output.size(); i++)
+  {
+    const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]);
+    const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]);
+
+    // Create tensors to store intermediate results
+    _intermediate.emplace_back();
+    _intermediate.at(i).dtype(loco::DataType::FLOAT32);
+    // NOTE Use both first_node and second_node to avoid release build break
+    _intermediate.at(i).rank(first_node->rank());
+    uint32_t num_elems = 1;
+    for (uint32_t j = 0; j < second_node->rank(); j++)
+    {
+      _intermediate.at(i).dim(j) = second_node->dim(j);
+      num_elems *= second_node->dim(j).value();
+    }
+    _intermediate.at(i).size<loco::DataType::FLOAT32>(num_elems);
+
+    // Check the buffer is initilized with zero
+    for (uint32_t j = 0; j < num_elems; j++)
+      assert(_intermediate.at(i).at<loco::DataType::FLOAT32>(j) == 0.0);
+
+    // Save output names for logging
+    _output_names.emplace_back(first_node->name());
+  }
+}
+
+void MAEPrinter::accum_absolute_error(uint32_t output_idx, const std::shared_ptr<Tensor> &a,
+                                      const std::shared_ptr<Tensor> &b)
+{
+  assert(a->dtype() == loco::DataType::FLOAT32 and
+         b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS
+  assert(same_shape(a.get(), b.get()));          // FIX_CALLER_UNLESS
+  assert(output_idx < _intermediate.size());     // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++)
+  {
+    _intermediate.at(output_idx).at<loco::DataType::FLOAT32>(i) +=
+      std::abs(a->at<loco::DataType::FLOAT32>(i) - b->at<loco::DataType::FLOAT32>(i));
+  }
+}
+
+void MAEPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                            const std::vector<std::shared_ptr<Tensor>> &second)
+{
+  assert(first.size() == second.size());        // FIX_CALLER_UNLESS
+  assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto first_output = first[output_idx];
+    const auto second_output = second[output_idx];
+
+    // Cast data to fp32 and then compute absolute error
+    const auto fp32_first_output = fp32(first_output);
+    const auto fp32_second_output = fp32(second_output);
+
+    accum_absolute_error(output_idx, fp32_first_output, fp32_second_output);
+  }
+
+  _num_data++;
+}
+
+void MAEPrinter::dump(std::ostream &os) const
+{
+  os << "Mean Absolute Error (MAE)" << std::endl;
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto name = _output_names.at(output_idx);
+    const auto &inter = _intermediate.at(output_idx);
+    assert(inter.dtype() == loco::DataType::FLOAT32); // FIX_ME_UNLESS
+    const auto elem_count = inter.size<loco::DataType::FLOAT32>();
+
+    // Compute MAE
+    float mae = 0.0;
+    for (uint32_t elem_idx = 0; elem_idx < elem_count; elem_idx++)
+      mae += inter.at<loco::DataType::FLOAT32>(elem_idx);
+
+    mae = mae / elem_count;
+    mae = mae / _num_data;
+
+    os << "MAE for " << name << " is " << mae << std::endl;
+  }
+}
+
+// TODO Remove duplicate codes with MAEPrinter
+void MAPEPrinter::init(const luci::Module *first, const luci::Module *second)
+{
+  THROW_UNLESS(first != nullptr, "Invalid module.");
+  THROW_UNLESS(second != nullptr, "Invalid module.");
+
+  const auto first_output = loco::output_nodes(first->graph());
+  const auto second_output = loco::output_nodes(second->graph());
+
+  assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < first_output.size(); i++)
+  {
+    const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]);
+    const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]);
+
+    // Create tensors to store intermediate results
+    _intermediate.emplace_back();
+    _intermediate.at(i).dtype(loco::DataType::FLOAT32);
+    // NOTE Use both first_node and second_node to avoid release build break
+    _intermediate.at(i).rank(first_node->rank());
+    uint32_t num_elems = 1;
+    for (uint32_t j = 0; j < second_node->rank(); j++)
+    {
+      _intermediate.at(i).dim(j) = second_node->dim(j);
+      num_elems *= second_node->dim(j).value();
+    }
+    _intermediate.at(i).size<loco::DataType::FLOAT32>(num_elems);
+
+    // Check the buffer is initilized with zero
+    for (uint32_t j = 0; j < num_elems; j++)
+      assert(_intermediate.at(i).at<loco::DataType::FLOAT32>(j) == 0.0);
+
+    // Save output names for logging
+    _output_names.emplace_back(first_node->name());
+  }
+}
+
+// Accumulate |(a - b) / a|
+void MAPEPrinter::accum_mean_absolute_error(uint32_t output_idx, const std::shared_ptr<Tensor> &a,
+                                            const std::shared_ptr<Tensor> &b)
+{
+  assert(a->dtype() == loco::DataType::FLOAT32 and
+         b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS
+  assert(same_shape(a.get(), b.get()));          // FIX_CALLER_UNLESS
+  assert(output_idx < _intermediate.size());     // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++)
+  {
+    const auto a_val = a->at<loco::DataType::FLOAT32>(i);
+    const auto b_val = b->at<loco::DataType::FLOAT32>(i);
+    _intermediate.at(output_idx).at<loco::DataType::FLOAT32>(i) +=
+      std::abs((a_val - b_val) / a_val);
+  }
+}
+
+// Assumption
+// first: the result of fp32 model
+// second: the result of fake-quantized model
+void MAPEPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                             const std::vector<std::shared_ptr<Tensor>> &second)
+{
+  assert(first.size() == second.size());        // FIX_CALLER_UNLESS
+  assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto first_output = first[output_idx];
+    const auto second_output = second[output_idx];
+
+    // Cast data to fp32 and then compute absolute error
+    const auto fp32_first_output = fp32(first_output);
+    const auto fp32_second_output = fp32(second_output);
+
+    accum_mean_absolute_error(output_idx, fp32_first_output, fp32_second_output);
+  }
+
+  _num_data++;
+}
+
+void MAPEPrinter::dump(std::ostream &os) const
+{
+  os << "Mean Absolute Percentage Error (MAPE)" << std::endl;
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto name = _output_names.at(output_idx);
+    const auto &inter = _intermediate.at(output_idx);
+    assert(inter.dtype() == loco::DataType::FLOAT32); // FIX_ME_UNLESS
+    const auto elem_count = inter.size<loco::DataType::FLOAT32>();
+
+    // Compute MAPE
+    float mape = 0.0;
+    for (uint32_t elem_idx = 0; elem_idx < elem_count; elem_idx++)
+      mape += inter.at<loco::DataType::FLOAT32>(elem_idx);
+
+    mape = mape / elem_count;
+    mape = mape / _num_data;
+    mape *= 100.0;
+
+    os << "MAPE for " << name << " is " << mape << "%" << std::endl;
+  }
+}
+
+// TODO Remove duplicate codes with MAEPrinter
+void MPEIRPrinter::init(const luci::Module *first, const luci::Module *second)
+{
+  THROW_UNLESS(first != nullptr, "Invalid module.");
+  THROW_UNLESS(second != nullptr, "Invalid module.");
+
+  const auto first_output = loco::output_nodes(first->graph());
+  const auto second_output = loco::output_nodes(second->graph());
+
+  assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < first_output.size(); i++)
+  {
+    const auto first_node = loco::must_cast<luci::CircleOutput *>(first_output[i]);
+    const auto second_node = loco::must_cast<luci::CircleOutput *>(second_output[i]);
+
+    // Create places to store intermediate results
+    _intermediate.emplace_back(0.0);
+
+    // Save output names for logging
+    _output_names.emplace_back(first_node->name());
+  }
+}
+
+// Accumulate PEIR (Peak Error to Interval Ratio)
+// PEIR = max(|a - b|) / (max(a) - min(a))
+// PEIR >= 0 (lower is better)
+void MPEIRPrinter::accum_peir(uint32_t output_idx, const std::shared_ptr<Tensor> &a,
+                              const std::shared_ptr<Tensor> &b)
+{
+  assert(a->dtype() == loco::DataType::FLOAT32 and
+         b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS
+  assert(same_shape(a.get(), b.get()));          // FIX_CALLER_UNLESS
+  assert(output_idx < _intermediate.size());     // FIX_CALLER_UNLESS
+
+  float min = std::numeric_limits<float>::max();
+  float max = std::numeric_limits<float>::lowest();
+
+  for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++)
+  {
+    const auto a_val = a->at<loco::DataType::FLOAT32>(i);
+    min = std::min(a_val, min);
+    max = std::max(a_val, max);
+  }
+
+  float interval = max - min;
+
+  // Corner case: All values are the same. We set interval = 1 in this case
+  if (interval == 0)
+    interval = 1.0;
+
+  float peak_error = std::numeric_limits<float>::lowest();
+
+  for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++)
+  {
+    const auto a_val = a->at<loco::DataType::FLOAT32>(i);
+    const auto b_val = b->at<loco::DataType::FLOAT32>(i);
+    const auto error = std::abs(a_val - b_val);
+    peak_error = std::max(error, peak_error);
+  }
+
+  _intermediate.at(output_idx) += peak_error / interval;
+}
+
+// Assumption (when testing the accuracy of quantized model)
+// first: the result of fp32 model
+// second: the result of fake-quantized model
+void MPEIRPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                              const std::vector<std::shared_ptr<Tensor>> &second)
+{
+  assert(first.size() == second.size());        // FIX_CALLER_UNLESS
+  assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto first_output = first[output_idx];
+    const auto second_output = second[output_idx];
+
+    // Cast data to fp32 for ease of computation
+    const auto fp32_first_output = fp32(first_output);
+    const auto fp32_second_output = fp32(second_output);
+
+    accum_peir(output_idx, fp32_first_output, fp32_second_output);
+  }
+
+  _num_data++;
+}
+
+void MPEIRPrinter::dump(std::ostream &os) const
+{
+  os << "Mean Peak Error to Interval Ratio (MPEIR)" << std::endl;
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto name = _output_names.at(output_idx);
+    const auto sum_of_peir = _intermediate.at(output_idx);
+
+    // Compute MPEIR
+    float mpeir = sum_of_peir / _num_data;
+
+    os << "MPEIR for " << name << " is " << mpeir << std::endl;
+  }
+}
+
+// TODO Remove duplicate codes with MAEPrinter
+void TopKMatchPrinter::init(const luci::Module *first, const luci::Module *second)
+{
+  THROW_UNLESS(first != nullptr, "Invalid module.");
+  THROW_UNLESS(second != nullptr, "Invalid module.");
+
+  const auto first_output = loco::output_nodes(first->graph());
+  const auto second_output = loco::output_nodes(second->graph());
+
+  assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < first_output.size(); i++)
+  {
+    const auto first_node = loco::must_cast<luci::CircleOutput *>(first_output[i]);
+    const auto second_node = loco::must_cast<luci::CircleOutput *>(second_output[i]);
+
+    // Create places to store intermediate results
+    _intermediate.emplace_back(0.0);
+
+    // Save output names for logging
+    _output_names.emplace_back(first_node->name());
+
+    // If num_elems of an output is less than k,
+    // the output index is added to the skip list
+    if (num_elems(first_node) < _k)
+    {
+      std::cout << "Top-" << _k << "metric for " << first_node->name()
+                << " is ignored, because it has elements less than " << _k << std::endl;
+      _skip_output.emplace_back(i);
+    }
+  }
+}
+
+void TopKMatchPrinter::accum_topk_accuracy(uint32_t output_idx, const std::shared_ptr<Tensor> &a,
+                                           const std::shared_ptr<Tensor> &b)
+{
+  assert(a->dtype() == loco::DataType::FLOAT32 and
+         b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS
+  assert(same_shape(a.get(), b.get()));          // FIX_CALLER_UNLESS
+  assert(output_idx < _intermediate.size());     // FIX_CALLER_UNLESS
+
+  // Find Top-k largest elements
+  // This implementation is a variant of "Method 2 (Use temporary array)" in
+  // https://www.geeksforgeeks.org/k-largestor-smallest-elements-in-an-array/
+  // We sort top-k elements by value and index to ensure that the element with an earlier
+  // index comes first if multiple elements have the same value.
+  auto find_topk = [this](const std::shared_ptr<Tensor> &tensor) {
+    assert(_k <= tensor->size<loco::DataType::FLOAT32>()); // FIX_CALLER_UNLESS
+
+    // first: value, second: index
+    std::vector<std::pair<float, uint32_t>> topk;
+    topk.resize(_k);
+
+    // Initialize
+    for (uint32_t i = 0; i < _k; i++)
+    {
+      topk[i] = std::make_pair(tensor->at<loco::DataType::FLOAT32>(i), i);
+    }
+
+    // Input pair: (value, index)
+    // Return true if a has smaller value than b. If a and b have the same value,
+    // return true if a has larger index.
+    auto compare = [](const std::pair<float, uint32_t> &a, const std::pair<float, uint32_t> &b) {
+      if (a.first == b.first)
+        return a.second > b.second;
+
+      return a.first < b.first;
+    };
+
+    for (uint32_t i = _k; i < tensor->size<loco::DataType::FLOAT32>(); i++)
+    {
+      auto val = std::make_pair(tensor->at<loco::DataType::FLOAT32>(i), i);
+
+      auto min = std::min_element(topk.begin(), topk.end(), compare);
+      if (compare(*min, val))
+      {
+        // val is larger than min. Replace min with val.
+        auto min_index = std::distance(topk.begin(), min);
+        topk[min_index] = val;
+      }
+    }
+
+    return topk;
+  };
+
+  auto first_topk = find_topk(a);
+  auto second_topk = find_topk(b);
+
+  uint32_t matched = 0;
+  for (uint32_t i = 0; i < _k; i++)
+  {
+    for (uint32_t j = 0; j < _k; j++)
+    {
+      if (first_topk[i].second == second_topk[j].second)
+      {
+        matched++;
+        break;
+      }
+    }
+  }
+
+  float matched_ratio = static_cast<float>(matched) / _k;
+
+  _intermediate.at(output_idx) += matched_ratio;
+}
+
+bool TopKMatchPrinter::in_skip_list(uint32_t output_index) const
+{
+  for (auto skip : _skip_output)
+  {
+    if (output_index == skip)
+      return true;
+  }
+
+  return false;
+}
+
+void TopKMatchPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                                  const std::vector<std::shared_ptr<Tensor>> &second)
+{
+  assert(first.size() == second.size());        // FIX_CALLER_UNLESS
+  assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    if (in_skip_list(output_idx))
+      continue;
+
+    const auto first_output = first[output_idx];
+    const auto second_output = second[output_idx];
+
+    // Cast data to fp32 for ease of computation
+    const auto fp32_first_output = fp32(first_output);
+    const auto fp32_second_output = fp32(second_output);
+
+    accum_topk_accuracy(output_idx, fp32_first_output, fp32_second_output);
+  }
+
+  _num_data++;
+}
+
+void TopKMatchPrinter::dump(std::ostream &os) const
+{
+  os << "Ratio of Matched Indices between Top-" << _k << " results of the models" << std::endl;
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    if (in_skip_list(output_idx))
+      continue;
+
+    const auto name = _output_names.at(output_idx);
+    const auto sum_of_topk_accuracy = _intermediate.at(output_idx);
+
+    // Compute TopKMatch
+    float mean_topk = sum_of_topk_accuracy / _num_data;
+
+    os << "Mean Top-" << _k << " match ratio for " << name << " is " << mean_topk << std::endl;
+  }
+}
+
+void MSEPrinter::init(const luci::Module *first, const luci::Module *second)
+{
+  THROW_UNLESS(first != nullptr, "Invalid module.");
+  THROW_UNLESS(second != nullptr, "Invalid module.");
+
+  const auto first_output = loco::output_nodes(first->graph());
+  const auto second_output = loco::output_nodes(second->graph());
+
+  assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < first_output.size(); i++)
+  {
+    const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]);
+    const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]);
+
+    // Create tensors to store intermediate results
+    _intermediate.emplace_back();
+    _intermediate.at(i).dtype(loco::DataType::FLOAT32);
+    // NOTE Use both first_node and second_node to avoid release build break
+    _intermediate.at(i).rank(first_node->rank());
+    uint32_t num_elems = 1;
+    for (uint32_t j = 0; j < second_node->rank(); j++)
+    {
+      _intermediate.at(i).dim(j) = second_node->dim(j);
+      num_elems *= second_node->dim(j).value();
+    }
+    _intermediate.at(i).size<loco::DataType::FLOAT32>(num_elems);
+
+    // Check the buffer is initilized with zero
+    for (uint32_t j = 0; j < num_elems; j++)
+      assert(_intermediate.at(i).at<loco::DataType::FLOAT32>(j) == 0.0);
+
+    // Save output names for logging
+    _output_names.emplace_back(first_node->name());
+  }
+}
+
+void MSEPrinter::accum_squared_error(uint32_t output_idx, const std::shared_ptr<Tensor> &a,
+                                     const std::shared_ptr<Tensor> &b)
+{
+  assert(a->dtype() == loco::DataType::FLOAT32 and
+         b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS
+  assert(same_shape(a.get(), b.get()));          // FIX_CALLER_UNLESS
+  assert(output_idx < _intermediate.size());     // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++)
+  {
+    _intermediate.at(output_idx).at<loco::DataType::FLOAT32>(i) +=
+      (a->at<loco::DataType::FLOAT32>(i) - b->at<loco::DataType::FLOAT32>(i)) *
+      (a->at<loco::DataType::FLOAT32>(i) - b->at<loco::DataType::FLOAT32>(i));
+  }
+}
+
+void MSEPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                            const std::vector<std::shared_ptr<Tensor>> &second)
+{
+  assert(first.size() == second.size());        // FIX_CALLER_UNLESS
+  assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto first_output = first[output_idx];
+    const auto second_output = second[output_idx];
+
+    // Cast data to fp32 and then compute absolute error
+    const auto fp32_first_output = fp32(first_output);
+    const auto fp32_second_output = fp32(second_output);
+
+    accum_squared_error(output_idx, fp32_first_output, fp32_second_output);
+  }
+
+  _num_data++;
+}
+
+void MSEPrinter::dump(std::ostream &os) const
+{
+  os << "Mean Squared Error (MSE)" << std::endl;
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto name = _output_names.at(output_idx);
+    const auto &inter = _intermediate.at(output_idx);
+    assert(inter.dtype() == loco::DataType::FLOAT32); // FIX_ME_UNLESS
+    const auto elem_count = inter.size<loco::DataType::FLOAT32>();
+
+    // Compute MSE
+    float mse = 0.0;
+    for (uint32_t elem_idx = 0; elem_idx < elem_count; elem_idx++)
+      mse += inter.at<loco::DataType::FLOAT32>(elem_idx);
+
+    mse = mse / elem_count;
+    mse = mse / _num_data;
+
+    os << "MSE for " << name << " is " << mse << std::endl;
+  }
+}
+
+} // namespace circle_eval_diff
+
+#undef THROW_UNLESS
diff --git a/compiler/circle-eval-diff/src/MetricPrinter.h b/compiler/circle-eval-diff/src/MetricPrinter.h
new file mode 100644
index 000000000..c8f27511c
--- /dev/null
+++ b/compiler/circle-eval-diff/src/MetricPrinter.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EVAL_DIFF_METRIC_PRINTER_H__
+#define __CIRCLE_EVAL_DIFF_METRIC_PRINTER_H__
+
+#include <luci/IR/Module.h>
+
+#include "Tensor.h"
+
+#include <vector>
+#include <iostream>
+
+namespace circle_eval_diff
+{
+
+// Class to print metrics
+// How to use?
+//
+// MetricPrinter metric;
+// metric.init(first_module, second_module); // optional initialization
+//
+// for (..) // Evaluate data one by one
+// {
+//   ..
+//   metric.accumulate(first_result, second_result); // accumulate results
+// }
+//
+// std::cout << &metric << std::endl; // print result
+class MetricPrinter
+{
+public:
+  virtual ~MetricPrinter() = default;
+
+  // Child class can implement this function if necessary
+  // NOTE init can be skipped
+  virtual void init(const luci::Module *, const luci::Module *) {}
+
+  // Accumulate results of comparing the first and the second model's outputs
+  virtual void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                          const std::vector<std::shared_ptr<Tensor>> &second) = 0;
+
+  // Dump the final result of the corresponding metric
+  virtual void dump(std::ostream &os) const = 0;
+};
+
+static inline std::ostream &operator<<(std::ostream &os, const MetricPrinter *m)
+{
+  m->dump(os);
+  return os;
+}
+
+// Mean Absolute Error
+class MAEPrinter final : public MetricPrinter
+{
+public:
+  void init(const luci::Module *first, const luci::Module *second);
+
+  void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                  const std::vector<std::shared_ptr<Tensor>> &second);
+
+  void dump(std::ostream &os) const;
+
+private:
+  void accum_absolute_error(uint32_t index, const std::shared_ptr<Tensor> &a,
+                            const std::shared_ptr<Tensor> &b);
+
+private:
+  // Store accumulated sum of absolute error for each output
+  std::vector<Tensor> _intermediate;
+  std::vector<std::string> _output_names;
+  uint32_t _num_data = 0;
+};
+
+// Mean Squared Error
+class MSEPrinter final : public MetricPrinter
+{
+public:
+  void init(const luci::Module *first, const luci::Module *second);
+
+  void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                  const std::vector<std::shared_ptr<Tensor>> &second);
+
+  void dump(std::ostream &os) const;
+
+private:
+  void accum_squared_error(uint32_t index, const std::shared_ptr<Tensor> &a,
+                           const std::shared_ptr<Tensor> &b);
+
+private:
+  // Store accumulated sum of absolute error for each output
+  std::vector<Tensor> _intermediate;
+  std::vector<std::string> _output_names;
+  uint32_t _num_data = 0;
+};
+
+// Mean Absolute Percentage Error
+class MAPEPrinter final : public MetricPrinter
+{
+public:
+  void init(const luci::Module *first, const luci::Module *second);
+
+  void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                  const std::vector<std::shared_ptr<Tensor>> &second);
+
+  void dump(std::ostream &os) const;
+
+private:
+  void accum_mean_absolute_error(uint32_t index, const std::shared_ptr<Tensor> &a,
+                                 const std::shared_ptr<Tensor> &b);
+
+private:
+  // Store accumulated sum of absolute error for each output
+  std::vector<Tensor> _intermediate;
+  std::vector<std::string> _output_names;
+  uint32_t _num_data = 0;
+};
+
+// Mean Peak Error to Interval Ratio (PEIR)
+// PEIR = max(|a - b|) / (max(a) - min(a))
+// PEIR >= 0 (lower is better)
+//
+// When testing the accuracy of quantized model,
+// the first model should be the original fp32 model, and
+// the second model should be the fake-quantized fp32 model
+class MPEIRPrinter final : public MetricPrinter
+{
+public:
+  void init(const luci::Module *first, const luci::Module *second);
+
+  void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                  const std::vector<std::shared_ptr<Tensor>> &second);
+
+  void dump(std::ostream &os) const;
+
+private:
+  void accum_peir(uint32_t index, const std::shared_ptr<Tensor> &a,
+                  const std::shared_ptr<Tensor> &b);
+
+private:
+  // Store accumulated sum of PEIR for each output
+  std::vector<float> _intermediate;
+  std::vector<std::string> _output_names;
+  uint32_t _num_data = 0;
+};
+
+// Ratio of matched indices between top-k results of two models (a, b).
+//
+// top-k match = intersection(top_k_idx(a), top_k_idx(b)) / k
+// mean top-k match = sum(top-k match) / num_data
+//
+// For example,
+// num_data = 2
+// first model output = [1, 2, 3], [2, 3, 1]
+// second model output = [2, 4, 6], [3, 2, 1]
+//
+// if k = 1,
+// first model top-1 index = ([2], [1])
+// second model top-1 index = ([2], [0])
+// mean top-1 accuracy = (1 + 0) / 2 = 0.5
+//
+// if k = 2,
+// first model output = [1, 2, 3], [2, 3, 1]
+// second model output = [2, 4, 6], [3, 2, 1]
+// first model top-2 index = ([2, 1], [1, 0])
+// second model top-2 index = ([2, 1], [0, 1])
+// mean top-2 accuracy = (2 + 2) / 4 = 1
+//
+// NOTE Order of elements is ignored when comparing two top-k sets.
+// NOTE If two elements have the same value and only one can be included in top-k,
+// the one with an earlier index will be included.
+class TopKMatchPrinter : public MetricPrinter
+{
+public:
+  TopKMatchPrinter(uint32_t k) : _k(k) {}
+
+public:
+  void init(const luci::Module *first, const luci::Module *second);
+
+  void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                  const std::vector<std::shared_ptr<Tensor>> &second);
+
+  void dump(std::ostream &os) const;
+
+private:
+  void accum_topk_accuracy(uint32_t index, const std::shared_ptr<Tensor> &a,
+                           const std::shared_ptr<Tensor> &b);
+
+  // Return true if the output is in the skip list (_skip_output)
+  bool in_skip_list(uint32_t output_index) const;
+
+private:
+  const uint32_t _k = 0;
+  // Store accumulated accuracy
+  std::vector<float> _intermediate;
+  std::vector<std::string> _output_names;
+  uint32_t _num_data = 0;
+  // Save index of output whose num_elements is less than k
+  std::vector<uint32_t> _skip_output;
+};
+
+} // namespace circle_eval_diff
+
+#endif // __CIRCLE_EVAL_DIFF_METRIC_PRINTER_H__
diff --git a/compiler/circle-eval-diff/src/MetricPrinter.test.cpp b/compiler/circle-eval-diff/src/MetricPrinter.test.cpp
new file mode 100644
index 000000000..0e71b80cc
--- /dev/null
+++ b/compiler/circle-eval-diff/src/MetricPrinter.test.cpp
@@ -0,0 +1,548 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MetricPrinter.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+using Tensor = circle_eval_diff::Tensor;
+
+namespace
+{
+
+// TODO Reduce duplicate codes in ResolveCustomOpMatMulPass.cpp
+template <typename T>
+luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype,
+                                     const std::vector<uint32_t> &shape,
+                                     const std::vector<T> &values)
+{
+  auto node = g->nodes()->create<luci::CircleConst>();
+  node->dtype(dtype);
+  node->rank(shape.size());
+
+  uint32_t size = 1;
+  for (uint32_t i = 0; i < shape.size(); ++i)
+  {
+    node->dim(i) = shape.at(i);
+    size *= shape.at(i);
+  }
+  node->shape_status(luci::ShapeStatus::VALID);
+
+#define INIT_VALUES(DT)                          \
+  {                                              \
+    node->size<DT>(size);                        \
+    for (uint32_t i = 0; i < values.size(); ++i) \
+      node->at<DT>(i) = values[i];               \
+  }
+
+  switch (dtype)
+  {
+    case loco::DataType::U8:
+      INIT_VALUES(loco::DataType::U8);
+      break;
+    case loco::DataType::S16:
+      INIT_VALUES(loco::DataType::S16);
+      break;
+    case loco::DataType::S32:
+      INIT_VALUES(loco::DataType::S32);
+      break;
+    case loco::DataType::FLOAT32:
+      INIT_VALUES(loco::DataType::FLOAT32)
+      break;
+    default:
+      INTERNAL_EXN("create_const_node called with unsupported type");
+      break;
+  }
+  return node;
+}
+
+/**
+ *  Simple graph which adds constant (addition) to the input
+ *
+ *  [Input] [Const] (addition)
+ *      \   /
+ *      [Add]
+ *
+ */
+class AddGraphlet
+{
+public:
+  AddGraphlet() = default;
+
+  void init(loco::Graph *g, float addition)
+  {
+    std::vector<float> addition_val;
+    for (uint32_t i = 0; i < 16; i++)
+      addition_val.push_back(addition);
+    _add_c = create_const_node(g, loco::DataType::FLOAT32, {1, 16}, addition_val);
+
+    _add = g->nodes()->create<luci::CircleAdd>();
+    _add->y(_add_c);
+    _add->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _add->dtype(loco::DataType::FLOAT32);
+    _add->shape({1, 16});
+    _add->name("add");
+  }
+
+protected:
+  luci::CircleAdd *_add = nullptr;
+  luci::CircleConst *_add_c = nullptr;
+};
+
+class AddOneGraph : public luci::test::TestIOGraph, public AddGraphlet
+{
+public:
+  AddOneGraph() = default;
+
+  void init(void)
+  {
+    luci::test::TestIOGraph::init({1, 4}, {1, 16});
+    AddGraphlet::init(g(), 1.0);
+
+    _add->x(input());
+
+    output()->from(_add);
+  }
+
+  std::unique_ptr<loco::Graph> graph(void) { return std::move(_g); }
+};
+
+class AddTwoGraph : public luci::test::TestIOGraph, public AddGraphlet
+{
+public:
+  AddTwoGraph() = default;
+
+  void init(void)
+  {
+    luci::test::TestIOGraph::init({1, 4}, {1, 16});
+    AddGraphlet::init(g(), 2.0);
+
+    _add->x(input());
+
+    output()->from(_add);
+  }
+
+  std::unique_ptr<loco::Graph> graph(void) { return std::move(_g); }
+};
+
+// Return number of elements of the node.
+uint32_t numElements(const luci::CircleNode *node)
+{
+  uint32_t num_elem = 1;
+  for (uint32_t i = 0; i < node->rank(); ++i)
+    num_elem *= node->dim(i).value();
+  return num_elem;
+}
+
+// Return Tensor which has the same dtype and shape with node.
+// Buffer does not have any data yet.
+std::shared_ptr<Tensor> create_empty_tensor(const luci::CircleNode *node)
+{
+  auto tensor = std::make_shared<Tensor>();
+  {
+    tensor->dtype(node->dtype());
+    tensor->rank(node->rank());
+    for (uint32_t i = 0; i < node->rank(); i++)
+      tensor->dim(i) = node->dim(i);
+    tensor->size<loco::DataType::FLOAT32>(numElements(node));
+  }
+
+  return tensor;
+}
+
+std::shared_ptr<Tensor> output_tensor_with_value(const luci::Module *module, float value)
+{
+  auto outputs = loco::output_nodes(module->graph());
+  assert(outputs.size() == 1);
+  auto output = *outputs.begin();
+  auto output_cnode = loco::must_cast<luci::CircleNode *>(output);
+  auto tensor = create_empty_tensor(output_cnode);
+  auto tensor_size = tensor->size<loco::DataType::FLOAT32>();
+  for (uint32_t i = 0; i < tensor_size; i++)
+  {
+    tensor->at<loco::DataType::FLOAT32>(i) = value;
+  }
+  return tensor;
+}
+
+std::shared_ptr<Tensor> output_tensor_with_value(const luci::Module *module,
+                                                 std::vector<float> &value)
+{
+  auto outputs = loco::output_nodes(module->graph());
+  assert(outputs.size() == 1);
+  auto output = *outputs.begin();
+  auto output_cnode = loco::must_cast<luci::CircleNode *>(output);
+  auto tensor = create_empty_tensor(output_cnode);
+  auto tensor_size = tensor->size<loco::DataType::FLOAT32>();
+  assert(tensor_size == value.size());
+  for (uint32_t i = 0; i < tensor_size; i++)
+  {
+    tensor->at<loco::DataType::FLOAT32>(i) = value[i];
+  }
+  return tensor;
+}
+
+} // namespace
+
+namespace circle_eval_diff
+{
+
+TEST(CircleEvalMetricPrinterTest, MAE_simple)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  MAEPrinter mae;
+
+  mae.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    auto output = output_tensor_with_value(&first, 1.0);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    auto output = output_tensor_with_value(&second, 2.0);
+    second_result.emplace_back(output);
+  }
+
+  mae.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  mae.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_NE(std::string::npos, result.find("MAE for output_0 is 1"));
+}
+
+TEST(CircleEvalMetricPrinterTest, MAE_init_with_null_NEG)
+{
+  MAEPrinter mae;
+
+  EXPECT_ANY_THROW(mae.init(nullptr, nullptr));
+}
+
+TEST(CircleEvalMetricPrinterTest, MAPE_simple)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  MAPEPrinter mape;
+
+  mape.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    auto output = output_tensor_with_value(&first, 2.0);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    auto output = output_tensor_with_value(&second, 1.0);
+    second_result.emplace_back(output);
+  }
+
+  mape.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  mape.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_NE(std::string::npos, result.find("MAPE for output_0 is 50%"));
+}
+
+TEST(CircleEvalMetricPrinterTest, MAPE_init_with_null_NEG)
+{
+  MAPEPrinter mape;
+
+  EXPECT_ANY_THROW(mape.init(nullptr, nullptr));
+}
+
+TEST(CircleEvalMetricPrinterTest, MPEIR_simple)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  MPEIRPrinter mpeir;
+
+  mpeir.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    std::vector<float> val;
+    val.resize(16);
+    for (uint32_t i = 0; i < 16; i++)
+      val[i] = i;
+
+    auto output = output_tensor_with_value(&first, val);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    auto output = output_tensor_with_value(&second, 0.0);
+    second_result.emplace_back(output);
+  }
+
+  mpeir.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  mpeir.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_NE(std::string::npos, result.find("MPEIR for output_0 is 1"));
+}
+
+TEST(CircleEvalMetricPrinterTest, MPEIR_init_with_null_NEG)
+{
+  MPEIRPrinter mpeir;
+
+  EXPECT_ANY_THROW(mpeir.init(nullptr, nullptr));
+}
+
+TEST(CircleEvalMetricPrinterTest, TopK_simple)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  TopKMatchPrinter top5(5);
+
+  top5.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    std::vector<float> val;
+    val.resize(16);
+    for (uint32_t i = 0; i < 16; i++)
+      val[i] = i;
+
+    auto output = output_tensor_with_value(&first, val);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    std::vector<float> val;
+    val.resize(16);
+    for (uint32_t i = 0; i < 16; i++)
+      val[i] = i * 2;
+    auto output = output_tensor_with_value(&second, val);
+    second_result.emplace_back(output);
+  }
+
+  top5.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  top5.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_NE(std::string::npos, result.find("Mean Top-5 match ratio for output_0 is 1"));
+}
+
+TEST(CircleEvalMetricPrinterTest, TopK_tie)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  TopKMatchPrinter top5(5);
+
+  top5.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    std::vector<float> val;
+    val.resize(16);
+    for (uint32_t i = 0; i < 16; i++)
+      val[i] = i;
+
+    auto output = output_tensor_with_value(&first, val);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    std::vector<float> val{12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 14, 15, 16};
+
+    auto output = output_tensor_with_value(&second, val);
+    second_result.emplace_back(output);
+  }
+
+  top5.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  top5.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_NE(std::string::npos, result.find("Mean Top-5 match ratio for output_0 is 0.8"));
+}
+
+TEST(CircleEvalMetricPrinterTest, TopK_num_elem_less_than_k_NEG)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  TopKMatchPrinter top100(100);
+
+  top100.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    auto output = output_tensor_with_value(&first, 0);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    auto output = output_tensor_with_value(&second, 0);
+    second_result.emplace_back(output);
+  }
+
+  top100.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  top100.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_EQ(std::string::npos, result.find("Mean Top-100 match ratio"));
+}
+
+TEST(CircleEvalMetricPrinterTest, TopK_init_with_null_NEG)
+{
+  TopKMatchPrinter topk(5);
+
+  EXPECT_ANY_THROW(topk.init(nullptr, nullptr));
+}
+
+TEST(CircleEvalMetricPrinterTest, MSE_simple)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  MSEPrinter mse;
+
+  mse.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    auto output = output_tensor_with_value(&first, 1.0);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    auto output = output_tensor_with_value(&second, 2.0);
+    second_result.emplace_back(output);
+  }
+
+  mse.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  mse.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_NE(std::string::npos, result.find("MSE for output_0 is 1"));
+}
+
+TEST(CircleEvalMetricPrinterTest, MSE_init_with_null_NEG)
+{
+  MSEPrinter mse;
+
+  EXPECT_ANY_THROW(mse.init(nullptr, nullptr));
+}
+
+} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/Tensor.cpp b/compiler/circle-eval-diff/src/Tensor.cpp
new file mode 100644
index 000000000..c3efc44cd
--- /dev/null
+++ b/compiler/circle-eval-diff/src/Tensor.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Tensor.h"
+
+#include <luci/IR/CircleNodeDecl.h>
+
+#include <cassert>
+
+namespace
+{
+
+// Return number of elements of the node.
+uint32_t numElements(const luci::CircleNode *node)
+{
+  uint32_t num_elem = 1;
+  for (uint32_t i = 0; i < node->rank(); ++i)
+    num_elem *= node->dim(i).value();
+  return num_elem;
+}
+
+} // namespace
+
+namespace circle_eval_diff
+{
+
+#define THROW_UNLESS(COND, MSG) \
+  if (not(COND))                \
+    throw std::runtime_error(MSG);
+
+template <loco::DataType DT> uint32_t Tensor::size(void) const
+{
+  assert(dtype() == DT);
+  assert(_data.size() % sizeof(typename loco::DataTypeImpl<DT>::Type) == 0);
+  return _data.size() / sizeof(typename loco::DataTypeImpl<DT>::Type);
+}
+
+template <loco::DataType DT> void Tensor::size(uint32_t l)
+{
+  assert(dtype() == DT);
+  _data.resize(l * sizeof(typename loco::DataTypeImpl<DT>::Type));
+}
+
+template <loco::DataType DT>
+const typename loco::DataTypeImpl<DT>::Type &Tensor::at(uint32_t n) const
+{
+  assert(dtype() == DT);
+  THROW_UNLESS(n < size<DT>(), "Access to out of buffer boundary.");
+  return *(reinterpret_cast<const typename loco::DataTypeImpl<DT>::Type *>(_data.data()) + n);
+}
+
+template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &Tensor::at(uint32_t n)
+{
+  assert(dtype() == DT);
+  THROW_UNLESS(n < size<DT>(), "Access to out of buffer boundary.");
+  return *(reinterpret_cast<typename loco::DataTypeImpl<DT>::Type *>(_data.data()) + n);
+}
+
+#undef THROW_UNLESS
+
+#define INSTANTIATE(DT)                                                                 \
+  template uint32_t Tensor::size<DT>(void) const;                                       \
+  template void Tensor::size<DT>(uint32_t);                                             \
+  template const typename loco::DataTypeImpl<DT>::Type &Tensor::at<DT>(uint32_t) const; \
+  template typename loco::DataTypeImpl<DT>::Type &Tensor::at<DT>(uint32_t);
+
+INSTANTIATE(loco::DataType::S64);
+INSTANTIATE(loco::DataType::S32);
+INSTANTIATE(loco::DataType::S16);
+INSTANTIATE(loco::DataType::U8);
+INSTANTIATE(loco::DataType::FLOAT32);
+
+#undef INSTANTIATE
+
+// Return Tensor which has the same dtype and shape with node.
+// Buffer does not have any data yet.
+std::shared_ptr<Tensor> createEmptyTensor(const luci::CircleNode *node)
+{
+  auto tensor = std::make_shared<Tensor>();
+  {
+    tensor->dtype(node->dtype());
+    tensor->rank(node->rank());
+    for (uint32_t i = 0; i < node->rank(); i++)
+      tensor->dim(i) = node->dim(i);
+
+    switch (node->dtype())
+    {
+      case loco::DataType::FLOAT32:
+        tensor->size<loco::DataType::FLOAT32>(numElements(node));
+        break;
+      case loco::DataType::U8:
+        tensor->size<loco::DataType::U8>(numElements(node));
+        break;
+      case loco::DataType::S16:
+        tensor->size<loco::DataType::S16>(numElements(node));
+        break;
+      case loco::DataType::S32:
+        tensor->size<loco::DataType::S32>(numElements(node));
+        break;
+      case loco::DataType::S64:
+        tensor->size<loco::DataType::S64>(numElements(node));
+        break;
+      default:
+        throw std::runtime_error("Unsupported input tensor dtype for " + node->name());
+    }
+  }
+
+  return tensor;
+}
+
+} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/Tensor.h b/compiler/circle-eval-diff/src/Tensor.h
new file mode 100644
index 000000000..d4f65d951
--- /dev/null
+++ b/compiler/circle-eval-diff/src/Tensor.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EVAL_DIFF_TENSOR_H__
+#define __CIRCLE_EVAL_DIFF_TENSOR_H__
+
+#include <loco.h>
+#include <luci/IR/CircleNodeDecl.h>
+
+#include <vector>
+
+namespace circle_eval_diff
+{
+
+struct TensorDataType
+{
+public:
+  const loco::DataType &dtype(void) const { return _dtype; }
+  void dtype(const loco::DataType &dtype) { _dtype = dtype; }
+
+private:
+  loco::DataType _dtype = loco::DataType::Unknown;
+};
+
+struct TensorShape
+{
+public:
+  uint32_t rank(void) const { return _dims.size(); }
+  void rank(uint32_t value) { _dims.resize(value); }
+
+  const loco::Dimension &dim(uint32_t axis) const { return _dims.at(axis); }
+  loco::Dimension &dim(uint32_t axis) { return _dims.at(axis); }
+
+  void shape(std::initializer_list<uint32_t> dims)
+  {
+    rank(dims.size());
+
+    uint32_t axis = 0;
+    for (auto d : dims)
+    {
+      dim(axis++) = d;
+    }
+  }
+
+private:
+  std::vector<loco::Dimension> _dims;
+};
+
+// Tensor has three kinds of data
+// 1. DataType (_dtype)
+// 2. Shape (_dims)
+// 3. Buffer (_data)
+struct Tensor final : public TensorShape, public TensorDataType
+{
+public:
+  template <loco::DataType DT> uint32_t size(void) const;
+  template <loco::DataType DT> void size(uint32_t size);
+  template <loco::DataType DT> const typename loco::DataTypeImpl<DT>::Type &at(uint32_t n) const;
+  template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &at(uint32_t n);
+  uint8_t *buffer(void) { return _data.data(); }
+  uint32_t byte_size(void) const { return _data.size(); }
+
+private:
+  std::vector<uint8_t> _data;
+};
+
+std::shared_ptr<Tensor> createEmptyTensor(const luci::CircleNode *node);
+
+} // namespace circle_eval_diff
+
+#endif // __CIRCLE_EVAL_DIFF_TENSOR_H__
diff --git a/compiler/circle-eval-diff/src/Tensor.test.cpp b/compiler/circle-eval-diff/src/Tensor.test.cpp
new file mode 100644
index 000000000..395865748
--- /dev/null
+++ b/compiler/circle-eval-diff/src/Tensor.test.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Tensor.h"
+
+#include <gtest/gtest.h>
+
+#include <luci/IR/CircleNodes.h>
+
+using Tensor = circle_eval_diff::Tensor;
+
+namespace
+{
+
+template <loco::DataType DT> void test_out_of_buffer_range()
+{
+  Tensor t;
+
+  t.shape({1, 2, 3});
+  t.dtype(DT);
+  t.size<DT>(6);
+
+  EXPECT_ANY_THROW(t.at<DT>(6));
+}
+
+template <loco::DataType DT> void test_getter_setter()
+{
+  Tensor t;
+
+  // Check shape
+  t.shape({1, 2, 3});
+  EXPECT_EQ(3, t.rank());
+  EXPECT_EQ(1, t.dim(0));
+  EXPECT_EQ(2, t.dim(1));
+  EXPECT_EQ(3, t.dim(2));
+
+  // Check dtype
+  t.dtype(DT);
+  EXPECT_EQ(DT, t.dtype());
+
+  // Check buffer
+  t.size<DT>(6);
+  EXPECT_EQ(6 * sizeof(typename loco::DataTypeImpl<DT>::Type), t.byte_size());
+  for (uint32_t i = 0; i < 6; i++)
+    t.at<DT>(i) = i;
+
+  for (uint32_t i = 0; i < 6; i++)
+    EXPECT_EQ(i, t.at<DT>(i));
+}
+
+} // namespace
+
+TEST(CircleEvalDiffTensorTest, constructor)
+{
+  Tensor t;
+
+  EXPECT_EQ(0, t.byte_size());
+  EXPECT_EQ(0, t.rank());
+  EXPECT_EQ(loco::DataType::Unknown, t.dtype());
+}
+
+TEST(CircleEvalDiffTensorTest, getter_setter)
+{
+  test_getter_setter<loco::DataType::S64>();
+  test_getter_setter<loco::DataType::S32>();
+  test_getter_setter<loco::DataType::S16>();
+  test_getter_setter<loco::DataType::U8>();
+  test_getter_setter<loco::DataType::FLOAT32>();
+
+  SUCCEED();
+}
+
+TEST(CircleEvalDiffTensorTest, out_of_shape_range_NEG)
+{
+  Tensor t;
+  t.shape({1, 2, 2, 3});
+
+  EXPECT_ANY_THROW(t.dim(4));
+}
+
+TEST(CircleEvalDiffTensorTest, out_of_buffer_range_NEG)
+{
+  test_out_of_buffer_range<loco::DataType::S64>();
+  test_out_of_buffer_range<loco::DataType::S32>();
+  test_out_of_buffer_range<loco::DataType::S16>();
+  test_out_of_buffer_range<loco::DataType::U8>();
+  test_out_of_buffer_range<loco::DataType::FLOAT32>();
+
+  SUCCEED();
+}
+
+TEST(CircleEvalDiffTensorTest, createEmptyTensorTest)
+{
+  luci::CircleInput input;
+  input.dtype(loco::DataType::FLOAT32);
+  input.rank(4);
+  input.dim(0).set(1);
+  input.dim(1).set(3);
+  input.dim(2).set(3);
+  input.dim(3).set(2);
+
+  loco::DataType right_data_type{loco::DataType::FLOAT32};
+  std::vector<loco::Dimension> right_shape;
+  right_shape.emplace_back(1);
+  right_shape.emplace_back(3);
+  right_shape.emplace_back(3);
+  right_shape.emplace_back(2);
+
+  auto tensor = circle_eval_diff::createEmptyTensor(&input);
+  EXPECT_EQ(loco::DataType::FLOAT32, tensor->dtype());
+  EXPECT_EQ(4, tensor->rank());
+  EXPECT_EQ(1, tensor->dim(0));
+  EXPECT_EQ(3, tensor->dim(1));
+  EXPECT_EQ(3, tensor->dim(2));
+  EXPECT_EQ(2, tensor->dim(3));
+}