23 files changed, 2278 insertions, 0 deletions
diff --git a/tests/tools/CMakeLists.txt b/tests/tools/CMakeLists.txt
new file mode 100644
index 000000000..b1eea12f9
--- /dev/null
+++ b/tests/tools/CMakeLists.txt
@@ -0,0 +1,8 @@
+set(EXCLUDE_DIR "")
+
+if(OBS_BUILD)
+  list(APPEND EXCLUDE_DIR tflite_benchmark_model)
+  list(APPEND EXCLUDE_DIR tflite_run)
+endif(OBS_BUILD)
+
+add_subdirectories(EXCLUDES ${EXCLUDE_DIR})
diff --git a/tests/tools/nnapi_test/CMakeLists.txt b/tests/tools/nnapi_test/CMakeLists.txt
new file mode 100644
index 000000000..b52f4f34b
--- /dev/null
+++ b/tests/tools/nnapi_test/CMakeLists.txt
@@ -0,0 +1,5 @@
+list(APPEND SOURCES "src/nnapi_test.cc")
+
+add_executable(nnapi_test ${SOURCES})
+target_link_libraries(nnapi_test nnfw_lib_tflite)
+install(TARGETS nnapi_test DESTINATION bin)
diff --git a/tests/tools/nnapi_test/src/nnapi_test.cc b/tests/tools/nnapi_test/src/nnapi_test.cc
new file mode 100644
index 000000000..73e80f01f
--- /dev/null
+++ b/tests/tools/nnapi_test/src/nnapi_test.cc
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tflite/ext/kernels/register.h"
+#include "tensorflow/contrib/lite/model.h"
+
+#include "tflite/interp/FlatBufferBuilder.h"
+#include "tflite/Diff.h"
+
+#include <iostream>
+#include <stdexcept>
+
+using namespace tflite;
+using namespace nnfw::tflite;
+
+int main(const int argc, char **argv)
+{
+  if (argc < 2)
+  {
+    std::cerr << "nnapi_test\n\n";
+    std::cerr << "Usage: " << argv[0] << " <.tflite>\n\n";
+    return 1;
+  }
+
+  const auto filename = argv[1];
+
+  StderrReporter error_reporter;
+
+  auto model = FlatBufferModel::BuildFromFile(filename, &error_reporter);
+
+  const nnfw::tflite::FlatBufferBuilder builder(*model);
+
+  try
+  {
+    return RandomTestRunner::make(0).run(builder);
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << e.what() << std::endl;
+    return 1;
+  }
+}
diff --git a/tests/tools/tflite_benchmark/CMakeLists.txt b/tests/tools/tflite_benchmark/CMakeLists.txt
new file mode 100644
index 000000000..56421a294
--- /dev/null
+++ b/tests/tools/tflite_benchmark/CMakeLists.txt
@@ -0,0 +1,5 @@
+list(APPEND SOURCES "src/tflite_benchmark.cc")
+
+add_executable(tflite_benchmark ${SOURCES})
+target_link_libraries(tflite_benchmark nnfw_lib_tflite tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_misc)
+install(TARGETS tflite_benchmark DESTINATION bin)
diff --git a/tests/tools/tflite_benchmark/src/tflite_benchmark.cc b/tests/tools/tflite_benchmark/src/tflite_benchmark.cc
new file mode 100644
index 000000000..b77afc189
--- /dev/null
+++ b/tests/tools/tflite_benchmark/src/tflite_benchmark.cc
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tflite/ext/kernels/register.h"
+#include "tensorflow/contrib/lite/model.h"
+
+#include "tflite/Assert.h"
+#include "tflite/Session.h"
+#include "tflite/InterpreterSession.h"
+#include "tflite/NNAPISession.h"
+#include "tflite/Diff.h"
+#include "misc/tensor/IndexIterator.h"
+
+#include <boost/accumulators/accumulators.hpp>
+#include <boost/accumulators/statistics/stats.hpp>
+#include <boost/accumulators/statistics/min.hpp>
+#include <boost/accumulators/statistics/max.hpp>
+#include <boost/accumulators/statistics/mean.hpp>
+
+#include <iostream>
+
+#include "misc/environment.h"
+#include "misc/benchmark.h"
+
+using namespace tflite;
+using namespace nnfw::tflite;
+
+void help(std::ostream &out, const int argc, char **argv)
+{
+  std::string cmd = argv[0];
+  auto pos = cmd.find_last_of("/");
+  if (pos != std::string::npos)
+    cmd = cmd.substr(pos + 1);
+
+  out << "use:" << std::endl << cmd << " <model file name>" << std::endl;
+}
+
+bool checkParams(const int argc, char **argv)
+{
+  if (argc < 2)
+  {
+    help(std::cerr, argc, argv);
+    return false;
+  }
+  return true;
+}
+
+int main(const int argc, char **argv)
+{
+
+  if (!checkParams(argc, argv))
+  {
+    return -1;
+  }
+
+  const auto filename = argv[1];
+
+  const bool use_nnapi = nnfw::misc::get_env_bool("USE_NNAPI");
+  const auto thread_count = nnfw::misc::get_env_int("THREAD", -1);
+
+  std::cout << "Num threads: " << thread_count << std::endl;
+  if (use_nnapi)
+  {
+    std::cout << "Use NNAPI" << std::endl;
+  }
+
+  StderrReporter error_reporter;
+
+  auto model = FlatBufferModel::BuildFromFile(filename, &error_reporter);
+  if (model == nullptr)
+  {
+    std::cerr << "Cannot create model" << std::endl;
+    return -1;
+  }
+
+  BuiltinOpResolver resolver;
+
+  InterpreterBuilder builder(*model, resolver);
+
+  std::unique_ptr<Interpreter> interpreter;
+
+  try
+  {
+    TFLITE_ENSURE(builder(&interpreter));
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << e.what() << std::endl;
+    return 1;
+  }
+
+  // Show inputs
+  for (uint32_t n = 0; n < interpreter->inputs().size(); ++n)
+  {
+    // TODO Print shape
+    auto tensor_id = interpreter->inputs().at(n);
+    auto tensor_ptr = interpreter->tensor(tensor_id);
+
+    std::cout << "Input #" << n << ":" << std::endl;
+    std::cout << "  Name: " << tensor_ptr->name << std::endl;
+  }
+
+  // Show outputs
+  for (uint32_t n = 0; n < interpreter->outputs().size(); ++n)
+  {
+    // TODO Print shape
+    auto tensor_id = interpreter->outputs().at(n);
+    auto tensor_ptr = interpreter->tensor(tensor_id);
+
+    std::cout << "Output #" << n << ":" << std::endl;
+    std::cout << "  Name: " << tensor_ptr->name << std::endl;
+  }
+
+  interpreter->SetNumThreads(thread_count);
+
+  std::shared_ptr<nnfw::tflite::Session> sess;
+
+  if (use_nnapi)
+  {
+    sess = std::make_shared<nnfw::tflite::NNAPISession>(interpreter.get());
+  }
+  else
+  {
+    sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get());
+  }
+
+  //
+  // Warming-up
+  //
+  for (uint32_t n = 0; n < 3; ++n)
+  {
+    std::chrono::milliseconds elapsed(0);
+
+    sess->prepare();
+
+    for (const auto &id : interpreter->inputs())
+    {
+      TfLiteTensor *tensor = interpreter->tensor(id);
+      if (tensor->type == kTfLiteInt32)
+      {
+        // Generate singed 32-bit integer (s32) input
+        auto tensor_view = nnfw::tflite::TensorView<int32_t>::make(*interpreter, id);
+
+        int32_t value = 0;
+
+        nnfw::misc::tensor::iterate(tensor_view.shape())
+            << [&](const nnfw::misc::tensor::Index &ind) {
+                 // TODO Generate random values
+                 // Gather operation: index should be within input coverage.
+                 tensor_view.at(ind) = value;
+                 value++;
+               };
+      }
+      else if (tensor->type == kTfLiteUInt8)
+      {
+        // Generate unsigned 8-bit integer input
+        auto tensor_view = nnfw::tflite::TensorView<uint8_t>::make(*interpreter, id);
+
+        uint8_t value = 0;
+
+        nnfw::misc::tensor::iterate(tensor_view.shape())
+            << [&](const nnfw::misc::tensor::Index &ind) {
+                 // TODO Generate random values
+                 tensor_view.at(ind) = value;
+                 value = (value + 1) & 0xFF;
+               };
+      }
+      else
+      {
+        assert(tensor->type == kTfLiteFloat32);
+
+        const int seed = 1; /* TODO Add an option for seed value */
+        RandomGenerator randgen{seed, 0.0f, 0.2f};
+        const float *end = reinterpret_cast<const float *>(tensor->data.raw_const + tensor->bytes);
+        for (float *ptr = tensor->data.f; ptr < end; ptr++)
+        {
+          *ptr = randgen.generate<float>();
+        }
+      }
+    }
+
+    nnfw::misc::benchmark::measure(elapsed) << [&](void) {
+      if (!sess->run())
+      {
+        assert(0 && "run failed");
+      }
+    };
+    sess->teardown();
+
+    std::cout << "Warming-up " << n << ": " << elapsed.count() << "ms" << std::endl;
+  }
+
+  //
+  // Measure
+  //
+  const auto cnt = nnfw::misc::get_env_int("COUNT", 1);
+
+  using namespace boost::accumulators;
+
+  accumulator_set<double, stats<tag::mean, tag::min, tag::max>> acc;
+
+  for (int n = 0; n < cnt; ++n)
+  {
+    std::chrono::milliseconds elapsed(0);
+
+    sess->prepare();
+    nnfw::misc::benchmark::measure(elapsed) << [&](void) {
+      if (!sess->run())
+      {
+        assert(0 && "run failed");
+      }
+    };
+    sess->teardown();
+
+    acc(elapsed.count());
+
+    std::cout << "Iteration " << n << ": " << elapsed.count() << "ms" << std::endl;
+  }
+
+  std::cout << "--------" << std::endl;
+  std::cout << "Min: " << min(acc) << "ms" << std::endl;
+  std::cout << "Max: " << max(acc) << "ms" << std::endl;
+  std::cout << "Mean: " << mean(acc) << "ms" << std::endl;
+
+  return 0;
+}
diff --git a/tests/tools/tflite_benchmark_model/.FORMATDENY b/tests/tools/tflite_benchmark_model/.FORMATDENY
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tests/tools/tflite_benchmark_model/.FORMATDENY
diff --git a/tests/tools/tflite_benchmark_model/CMakeLists.txt b/tests/tools/tflite_benchmark_model/CMakeLists.txt
new file mode 100644
index 000000000..c48f658c1
--- /dev/null
+++ b/tests/tools/tflite_benchmark_model/CMakeLists.txt
@@ -0,0 +1,18 @@
+if (NOT BUILD_TFLITE_BENCHMARK_MODEL)
+  return()
+endif(NOT BUILD_TFLITE_BENCHMARK_MODEL)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+nnfw_find_package(TensorFlowSource REQUIRED)
+set(TENSORFLOW_LITE_BASE "${TensorFlowSource_DIR}/tensorflow/contrib/lite")
+list(APPEND SOURCES "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_main.cc"
+                    "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_model.cc"
+                    "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_params.cc"
+                    "${TENSORFLOW_LITE_BASE}/tools/benchmark/command_line_flags.cc")
+
+add_executable(tflite_benchmark_model ${SOURCES})
+target_compile_definitions(tflite_benchmark_model PUBLIC "TFLITE_PROFILING_ENABLED")
+target_link_libraries(tflite_benchmark_model nnfw_lib_misc nnfw_lib_tflite nnfw_lib_profiling)
+target_link_libraries(tflite_benchmark_model tensorflow-lite ${LIB_PTHREAD} dl)
+install(TARGETS tflite_benchmark_model DESTINATION bin)
diff --git a/tests/tools/tflite_benchmark_model/README.md b/tests/tools/tflite_benchmark_model/README.md
new file mode 100644
index 000000000..8d997639f
--- /dev/null
+++ b/tests/tools/tflite_benchmark_model/README.md
@@ -0,0 +1,197 @@
+# TFLite Model Benchmark Tool
+
+## Description
+
+A simple C++ binary to benchmark a TFLite model and its individual operators,
+both on desktop machines and on Android. The binary takes a TFLite model,
+generates random inputs and then repeatedly runs the model for specified number
+of runs. Aggregrate latency statistics are reported after running the benchmark.
+
+The instructions below are for running the binary on Desktop and Android,
+for iOS please use the
+[iOS benchmark app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark/ios).
+
+## Parameters
+
+The binary takes the following required parameters:
+
+*   `graph`: `string` \
+    The path to the TFLite model file.
+
+and the following optional parameters:
+
+*   `num_threads`: `int` (default=1) \
+    The number of threads to use for running TFLite interpreter.
+*   `warmup_runs`: `int` (default=1) \
+    The number of warmup runs to do before starting the benchmark.
+*   `num_runs`: `int` (default=50) \
+    The number of runs. Increase this to reduce variance.
+*   `run_delay`: `float` (default=-1.0) \
+    The delay in seconds between subsequent benchmark runs. Non-positive values
+    mean use no delay.
+*   `use_nnapi`: `bool` (default=false) \
+    Whether to use [Android NNAPI](https://developer.android.com/ndk/guides/neuralnetworks/).
+    This API is available on recent Android devices.
+
+## To build/install/run
+
+### On Android:
+
+(0) Refer to https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android to edit the `WORKSPACE` to configure the android NDK/SDK.
+
+(1) Build for your specific platform, e.g.:
+
+```
+bazel build -c opt \
+  --config=android_arm \
+  --cxxopt='--std=c++11' \
+  tensorflow/contrib/lite/tools/benchmark:benchmark_model
+```
+
+(2) Connect your phone. Push the binary to your phone with adb push
+     (make the directory if required):
+
+```
+adb push bazel-bin/tensorflow/contrib/lite/tools/benchmark/benchmark_model /data/local/tmp
+```
+
+(3) Make the binary executable.
+
+```
+adb shell chmod +x /data/local/tmp/benchmark_model
+```
+
+(4) Push the compute graph that you need to test. For example:
+
+```
+adb push mobilenet_quant_v1_224.tflite /data/local/tmp
+```
+
+(5) Run the benchmark. For example:
+
+```
+adb shell /data/local/tmp/benchmark_model \
+  --graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \
+  --num_threads=4
+```
+
+### On desktop:
+(1) build the binary
+
+```
+bazel build -c opt tensorflow/contrib/lite/tools/benchmark:benchmark_model
+```
+
+(2) Run on your compute graph, similar to the Android case but without the need of adb shell.
+For example:
+
+```
+bazel-bin/tensorflow/contrib/lite/tools/benchmark/benchmark_model \
+  --graph=mobilenet_quant_v1_224.tflite \
+  --num_threads=4
+```
+
+The MobileNet graph used as an example here may be downloaded from [here](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip).
+
+
+## Reducing variance between runs on Android.
+
+Most modern Android phones use [ARM big.LITTLE](https://en.wikipedia.org/wiki/ARM_big.LITTLE)
+architecture where some cores are more power hungry but faster than other cores.
+When running benchmarks on these phones there can be significant variance
+between different runs of the benchmark. One way to reduce variance between runs
+is to set the [CPU affinity](https://en.wikipedia.org/wiki/Processor_affinity)
+before running the benchmark. On Android this can be done using the `taskset`
+command.
+E.g. for running the benchmark on big cores on Pixel 2 with a single thread one
+can use the following command:
+
+```
+adb shell taskset f0 /data/local/tmp/benchmark_model \
+  --graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \
+  --num_threads=1
+```
+
+where `f0` is the affinity mask for big cores on Pixel 2.
+Note: The affinity mask varies with the device.
+
+## Profiling model operators
+The benchmark model binary also allows you to profile operators and give execution times of each operator. To do this,
+compile the binary with a compiler flag that enables profiling to be compiled in. Pass **--copt=-DTFLITE_PROFILING_ENABLED**
+to compile benchmark with profiling support.
+For example, to compile with profiling support on Android, add this flag to the previous command:
+
+```
+bazel build -c opt \
+  --config=android_arm \
+  --cxxopt='--std=c++11' \
+  --copt=-DTFLITE_PROFILING_ENABLED \
+  tensorflow/contrib/lite/tools/benchmark:benchmark_model
+```
+This compiles TFLite with profiling enabled, now you can run the benchmark binary like before. The binary will produce detailed statistics for each operation similar to those shown below:
+
+```
+
+============================== Run Order ==============================
+	             [node type]	  [start]	  [first]	 [avg ms]	     [%]	  [cdf%]	  [mem KB]	[times called]	[Name]
+	                 CONV_2D	    0.000	    4.269	    4.269	  0.107%	  0.107%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_0/Relu6]
+	       DEPTHWISE_CONV_2D	    4.270	    2.150	    2.150	  0.054%	  0.161%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_1_depthwise/Relu6]
+	                 CONV_2D	    6.421	    6.107	    6.107	  0.153%	  0.314%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6]
+	       DEPTHWISE_CONV_2D	   12.528	    1.366	    1.366	  0.034%	  0.348%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_2_depthwise/Relu6]
+	                 CONV_2D	   13.895	    4.195	    4.195	  0.105%	  0.454%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_2_pointwise/Relu6]
+	       DEPTHWISE_CONV_2D	   18.091	    1.260	    1.260	  0.032%	  0.485%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_3_depthwise/Relu6]
+	                 CONV_2D	   19.352	    6.652	    6.652	  0.167%	  0.652%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6]
+	       DEPTHWISE_CONV_2D	   26.005	    0.698	    0.698	  0.018%	  0.670%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_4_depthwise/Relu6]
+	                 CONV_2D	   26.703	    3.344	    3.344	  0.084%	  0.754%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_4_pointwise/Relu6]
+	       DEPTHWISE_CONV_2D	   30.047	    0.646	    0.646	  0.016%	  0.770%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_5_depthwise/Relu6]
+	                 CONV_2D	   30.694	    5.800	    5.800	  0.145%	  0.915%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6]
+	       DEPTHWISE_CONV_2D	   36.495	    0.331	    0.331	  0.008%	  0.924%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_6_depthwise/Relu6]
+	                 CONV_2D	   36.826	    2.838	    2.838	  0.071%	  0.995%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_6_pointwise/Relu6]
+	       DEPTHWISE_CONV_2D	   39.665	    0.439	    0.439	  0.011%	  1.006%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_7_depthwise/Relu6]
+	                 CONV_2D	   40.105	    5.293	    5.293	  0.133%	  1.139%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6]
+	       DEPTHWISE_CONV_2D	   45.399	    0.352	    0.352	  0.009%	  1.147%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_8_depthwise/Relu6]
+	                 CONV_2D	   45.752	    5.322	    5.322	  0.133%	  1.281%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6]
+	       DEPTHWISE_CONV_2D	   51.075	    0.357	    0.357	  0.009%	  1.290%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_9_depthwise/Relu6]
+	                 CONV_2D	   51.432	    5.693	    5.693	  0.143%	  1.433%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6]
+	       DEPTHWISE_CONV_2D	   57.126	    0.366	    0.366	  0.009%	  1.442%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_10_depthwise/Relu6]
+	                 CONV_2D	   57.493	    5.472	    5.472	  0.137%	  1.579%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6]
+	       DEPTHWISE_CONV_2D	   62.966	    0.364	    0.364	  0.009%	  1.588%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_11_depthwise/Relu6]
+	                 CONV_2D	   63.330	    5.404	    5.404	  0.136%	  1.724%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6]
+	       DEPTHWISE_CONV_2D	   68.735	    0.155	    0.155	  0.004%	  1.728%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_12_depthwise/Relu6]
+	                 CONV_2D	   68.891	    2.970	    2.970	  0.074%	  1.802%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_12_pointwise/Relu6]
+	       DEPTHWISE_CONV_2D	   71.862	    0.206	    0.206	  0.005%	  1.807%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_13_depthwise/Relu6]
+	                 CONV_2D	   72.069	    5.888	    5.888	  0.148%	  1.955%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6]
+	         AVERAGE_POOL_2D	   77.958	    0.036	    0.036	  0.001%	  1.956%	     0.000	        0	[MobilenetV1/Logits/AvgPool_1a/AvgPool]
+	                 CONV_2D	   77.994	    1.445	    1.445	  0.036%	  1.992%	     0.000	        0	[MobilenetV1/Logits/Conv2d_1c_1x1/BiasAdd]
+	                 RESHAPE	   79.440	    0.002	    0.002	  0.000%	  1.992%	     0.000	        0	[MobilenetV1/Predictions/Reshape]
+	                 SOFTMAX	   79.443	    0.029	    0.029	  0.001%	  1.993%	     0.000	        0	[MobilenetV1/Predictions/Softmax]
+
+============================== Top by Computation Time ==============================
+	             [node type]	  [start]	  [first]	 [avg ms]	     [%]	  [cdf%]	  [mem KB]	[times called]	[Name]
+	                 CONV_2D	   19.352	    6.652	    6.652	  0.167%	  0.167%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6]
+	                 CONV_2D	    6.421	    6.107	    6.107	  0.153%	  0.320%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6]
+	                 CONV_2D	   72.069	    5.888	    5.888	  0.148%	  0.468%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6]
+	                 CONV_2D	   30.694	    5.800	    5.800	  0.145%	  0.613%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6]
+	                 CONV_2D	   51.432	    5.693	    5.693	  0.143%	  0.756%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6]
+	                 CONV_2D	   57.493	    5.472	    5.472	  0.137%	  0.893%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6]
+	                 CONV_2D	   63.330	    5.404	    5.404	  0.136%	  1.029%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6]
+	                 CONV_2D	   45.752	    5.322	    5.322	  0.133%	  1.162%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6]
+	                 CONV_2D	   40.105	    5.293	    5.293	  0.133%	  1.295%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6]
+	                 CONV_2D	    0.000	    4.269	    4.269	  0.107%	  1.402%	     0.000	        0	[MobilenetV1/MobilenetV1/Conv2d_0/Relu6]
+
+Number of nodes executed: 31
+============================== Summary by node type ==============================
+	             [Node type]	  [count]	  [avg ms]	    [avg %]	    [cdf %]	  [mem KB]	[times called]
+	                 CONV_2D	       15	     1.406	    89.270%	    89.270%	     0.000	        0
+	       DEPTHWISE_CONV_2D	       13	     0.169	    10.730%	   100.000%	     0.000	        0
+	                 SOFTMAX	        1	     0.000	     0.000%	   100.000%	     0.000	        0
+	                 RESHAPE	        1	     0.000	     0.000%	   100.000%	     0.000	        0
+	         AVERAGE_POOL_2D	        1	     0.000	     0.000%	   100.000%	     0.000	        0
+
+Timings (microseconds): count=50 first=79449 curr=81350 min=77385 max=88213 avg=79732 std=1929
+Memory (bytes): count=0
+31 nodes observed
+
+
+Average inference timings in us: Warmup: 83235, Init: 38467, no stats: 79760.9
+```
diff --git a/tests/tools/tflite_benchmark_model/benchmark_tflite_model.cc b/tests/tools/tflite_benchmark_model/benchmark_tflite_model.cc
new file mode 100644
index 000000000..efc8bae52
--- /dev/null
+++ b/tests/tools/tflite_benchmark_model/benchmark_tflite_model.cc
@@ -0,0 +1,401 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h"
+
+#include <cstdarg>
+#include <cstdlib>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#ifdef TFLITE_FLEX
+#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
+#endif  // TFLITE_FLEX
+#include "tflite/ext/kernels/register.h"
+#include "tensorflow/contrib/lite/model.h"
+#include "tensorflow/contrib/lite/op_resolver.h"
+#include "tensorflow/contrib/lite/string_util.h"
+#include "tensorflow/contrib/lite/tools/benchmark/logging.h"
+
+// For profiling nnapi_delegate
+#include "profiling/profiling.h"
+#include "tflite/ext/nnapi_delegate.h"
+
+namespace {
+  nnfw::tflite::NNAPIDelegate nnfw_delegate_;
+}
+
+#ifdef TFLITE_CUSTOM_OPS_HEADER
+void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
+#endif
+
+namespace tflite {
+namespace benchmark {
+
+void ProfilingListener::SetInterpreter(tflite::Interpreter* interpreter) {
+  TFLITE_BENCHMARK_CHECK(interpreter);
+  interpreter_ = interpreter;
+  interpreter_->SetProfiler(&profiler_);
+}
+
+void ProfilingListener::OnSingleRunStart(RunType run_type) {
+  if (run_type == REGULAR) {
+    profiler_.Reset();
+    profiler_.StartProfiling();
+  }
+}
+
+void ProfilingListener::OnBenchmarkEnd(const BenchmarkResults& results) {
+  if (has_profiles_) {
+    TFLITE_LOG(INFO) << summarizer_.GetOutputString();
+  }
+}
+
+void ProfilingListener::OnSingleRunEnd() {
+  profiler_.StopProfiling();
+  auto profile_events = profiler_.GetProfileEvents();
+  has_profiles_ = !profile_events.empty();
+  summarizer_.ProcessProfiles(profile_events, *interpreter_);
+}
+
+namespace {
+
+std::vector<std::string> Split(const std::string& str, const char delim) {
+  std::istringstream input(str);
+  std::vector<std::string> results;
+  std::string item;
+  while (std::getline(input, item, delim)) {
+    results.push_back(item);
+  }
+  return results;
+}
+
+template <typename T>
+bool SplitAndParse(const std::string& str, char delim, std::vector<T>* values) {
+  std::istringstream input(str);
+  bool first = true;
+  while (!input.eof()) {
+    if (!first) {
+      char c;
+      input >> c;
+      if (c != delim) {
+        return false;
+      }
+    } else {
+      first = false;
+    }
+    T val;
+    input >> val;
+    if (!input.eof() && !input.good()) {
+      return false;
+    }
+    values->push_back(val);
+  }
+  return true;
+}
+
+template <typename T>
+void FillRandomValue(T* ptr, const std::vector<int>& sizes,
+                     const std::function<T()>& random_func) {
+  int num_elements = 1;
+  for (int dim : sizes) {
+    num_elements *= dim;
+  }
+  for (int i = 0; i < num_elements; ++i) {
+    *ptr++ = random_func();
+  }
+}
+
+void FillRandomString(tflite::DynamicBuffer* buffer,
+                      const std::vector<int>& sizes,
+                      const std::function<string()>& random_func) {
+  int num_elements = 1;
+  for (int dim : sizes) {
+    num_elements *= dim;
+  }
+  for (int i = 0; i < num_elements; ++i) {
+    auto str = random_func();
+    buffer->AddString(str.data(), str.length());
+  }
+}
+
+bool PopulateInputLayerInfo(
+    const string& names_string, const string& shapes_string,
+    std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
+  std::vector<std::string> names = Split(names_string, ',');
+  std::vector<std::string> shapes = Split(shapes_string, ':');
+
+  if (names.size() != shapes.size()) {
+    TFLITE_LOG(ERROR) << "The number of items in"
+                      << " --input_layer_shape (" << shapes_string << ", with "
+                      << shapes.size() << " items)"
+                      << " must match the number of items in"
+                      << " --input_layer (" << names_string << ", with "
+                      << names.size() << " items)."
+                      << " For example --input_layer=input1,input2"
+                      << " --input_layer_shape=1,224,224,4:1,20";
+    return false;
+  }
+
+  for (int i = 0; i < names.size(); ++i) {
+    info->push_back(BenchmarkTfLiteModel::InputLayerInfo());
+    BenchmarkTfLiteModel::InputLayerInfo& input = info->back();
+
+    input.name = names[i];
+
+    TFLITE_BENCHMARK_CHECK(SplitAndParse(shapes[i], ',', &input.shape))
+        << "Incorrect size string specified: " << shapes[i];
+    for (int dim : input.shape) {
+      if (dim == -1) {
+        TFLITE_LOG(ERROR)
+            << "Any unknown sizes in the shapes (-1's) must be replaced"
+            << " with the size you want to benchmark with.";
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+BenchmarkParams GetDefaultParams() {
+  BenchmarkParams default_params = BenchmarkModel::DefaultParams();
+  default_params.AddParam("graph", BenchmarkParam::Create<std::string>(""));
+  default_params.AddParam("input_layer",
+                          BenchmarkParam::Create<std::string>(""));
+  default_params.AddParam("input_layer_shape",
+                          BenchmarkParam::Create<std::string>(""));
+  default_params.AddParam("use_nnapi", BenchmarkParam::Create<bool>(false));
+  return default_params;
+}
+
+}  // namespace
+
+BenchmarkTfLiteModel::BenchmarkTfLiteModel()
+    : BenchmarkModel(GetDefaultParams()) {
+  AddListener(&profiling_listener_);
+}
+
+BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params)
+    : BenchmarkModel(std::move(params)) {
+  AddListener(&profiling_listener_);
+}
+
+std::vector<Flag> BenchmarkTfLiteModel::GetFlags() {
+  std::vector<Flag> flags = BenchmarkTfLiteModel::BenchmarkModel::GetFlags();
+  std::vector<Flag> specific_flags = {
+      CreateFlag<std::string>("graph", &params_, "graph file name"),
+      CreateFlag<std::string>("input_layer", &params_, "input layer names"),
+      CreateFlag<std::string>("input_layer_shape", &params_,
+                              "input layer shape"),
+      CreateFlag<bool>("use_nnapi", &params_, "use nnapi api")};
+
+  flags.insert(flags.end(), specific_flags.begin(), specific_flags.end());
+  return flags;
+}
+
+void BenchmarkTfLiteModel::LogParams() {
+  BenchmarkModel::LogParams();
+  TFLITE_LOG(INFO) << "Graph: [" << params_.Get<std::string>("graph") << "]";
+  TFLITE_LOG(INFO) << "Input layers: ["
+                   << params_.Get<std::string>("input_layer") << "]";
+  TFLITE_LOG(INFO) << "Input shapes: ["
+                   << params_.Get<std::string>("input_layer_shape") << "]";
+  TFLITE_LOG(INFO) << "Use nnapi : [" << params_.Get<bool>("use_nnapi") << "]";
+}
+
+bool BenchmarkTfLiteModel::ValidateParams() {
+  if (params_.Get<std::string>("graph").empty()) {
+    TFLITE_LOG(ERROR)
+        << "Please specify the name of your TF Lite input file with --graph";
+    return false;
+  }
+  return PopulateInputLayerInfo(params_.Get<std::string>("input_layer"),
+                                params_.Get<std::string>("input_layer_shape"),
+                                &inputs);
+}
+
+uint64_t BenchmarkTfLiteModel::ComputeInputBytes() {
+  TFLITE_BENCHMARK_CHECK(interpreter);
+  uint64_t total_input_bytes = 0;
+  for (int input : interpreter->inputs()) {
+    auto* t = interpreter->tensor(input);
+    total_input_bytes += t->bytes;
+  }
+  return total_input_bytes;
+}
+
+void BenchmarkTfLiteModel::PrepareInputsAndOutputs() {
+  auto interpreter_inputs = interpreter->inputs();
+  // Set the values of the input tensors.
+  for (int j = 0; j < inputs.size(); ++j) {
+    const InputLayerInfo& input = inputs[j];
+    int i = interpreter_inputs[j];
+    TfLiteTensor* t = interpreter->tensor(i);
+    std::vector<int> sizes = input.shape;
+
+    // TODO(ahentz): below we ignore the O-th dimension (number of batches).
+    if (t->type == kTfLiteFloat32) {
+      FillRandomValue<float>(
+          interpreter->typed_tensor<float>(i),
+          std::vector<int>(sizes.begin() + 1, sizes.end()),
+          []() { return static_cast<float>(rand()) / RAND_MAX - 0.5f; });
+    } else if (t->type == kTfLiteInt32) {
+      // TODO(yunluli): This is currently only used for handling embedding input
+      // for speech models. Generalize if necessary.
+      FillRandomValue<int32_t>(
+          interpreter->typed_tensor<int32_t>(i),
+          std::vector<int32_t>(sizes.begin() + 1, sizes.end()),
+          []() { return static_cast<int32_t>(rand()) % 100; });
+    } else if (t->type == kTfLiteUInt8) {
+      FillRandomValue<uint8_t>(
+          interpreter->typed_tensor<uint8_t>(i),
+          std::vector<int>(sizes.begin() + 1, sizes.end()),
+          []() { return static_cast<uint8_t>(rand()) % 255; });
+    } else if (t->type == kTfLiteString) {
+      tflite::DynamicBuffer buffer;
+      FillRandomString(&buffer, sizes, []() {
+        return "we're have some friends over saturday to hang out in the yard";
+      });
+      buffer.WriteToTensor(interpreter->tensor(i));
+    } else {
+      TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t->name
+                        << " of type " << t->type;
+    }
+  }
+}
+
+void BenchmarkTfLiteModel::Init() {
+  std::string graph = params_.Get<std::string>("graph");
+  model = tflite::FlatBufferModel::BuildFromFile(graph.c_str());
+  if (!model) {
+    TFLITE_LOG(FATAL) << "Failed to mmap model " << graph;
+  }
+  TFLITE_LOG(INFO) << "Loaded model " << graph;
+  model->error_reporter();
+  TFLITE_LOG(INFO) << "resolved reporter";
+
+#ifdef TFLITE_CUSTOM_OPS_HEADER
+  tflite::MutableOpResolver resolver;
+  RegisterSelectedOps(&resolver);
+#else
+  nnfw::tflite::BuiltinOpResolver resolver;
+#endif
+
+  tflite::InterpreterBuilder(*model, resolver)(&interpreter);
+  if (!interpreter) {
+    TFLITE_LOG(FATAL) << "Failed to construct interpreter";
+  }
+  profiling_listener_.SetInterpreter(interpreter.get());
+  ::profiling::Context::get().setProfiler(interpreter->GetProfiler());
+
+  auto enable_sync = std::getenv("PROFILING_OP_SYNC");
+  if (enable_sync && std::atoi(enable_sync) != 0)
+  {
+    ::profiling::Context::get().setSync();
+  }
+
+  const int32_t num_threads = params_.Get<int32_t>("num_threads");
+
+  if (num_threads != -1) {
+    interpreter->SetNumThreads(num_threads);
+  }
+
+  bool use_nnapi = params_.Get<bool>("use_nnapi");
+
+  interpreter->UseNNAPI(use_nnapi);
+
+  if (use_nnapi) {
+    if (nnfw_delegate_.BuildGraph(interpreter.get()) != kTfLiteOk) {
+      TFLITE_LOG(FATAL) << "Failed to BuildGraph!";
+    }
+  }
+
+#ifdef TFLITE_FLEX
+  TFLITE_LOG(INFO) << "Instantiating Flex Delegate";
+  delegate_ = FlexDelegate::Create();
+  if (delegate_) {
+    interpreter->ModifyGraphWithDelegate(delegate_.get(),
+                                         /*allow_dynamic_tensors=*/true);
+  }
+#endif  // TFLITE_FLEX
+
+  auto interpreter_inputs = interpreter->inputs();
+
+  if (!inputs.empty()) {
+    TFLITE_BENCHMARK_CHECK_EQ(inputs.size(), interpreter_inputs.size())
+        << "Inputs mismatch: Model inputs #:" << interpreter_inputs.size()
+        << " expected: " << inputs.size();
+  }
+
+  // TFLITE_BENCHMARK_CHECK that all names and types match
+  for (int j = 0; j < inputs.size(); ++j) {
+    const InputLayerInfo& input = inputs[j];
+    int i = interpreter_inputs[j];
+    TfLiteTensor* t = interpreter->tensor(i);
+    TFLITE_BENCHMARK_CHECK_EQ(t->name, input.name)
+        << "Tensor # " << i << " is named " << t->name << " but flags call it "
+        << input.name;
+  }
+
+  // Resize all non-string tensors.
+  for (int j = 0; j < inputs.size(); ++j) {
+    const InputLayerInfo& input = inputs[j];
+    int i = interpreter_inputs[j];
+    TfLiteTensor* t = interpreter->tensor(i);
+    if (t->type != kTfLiteString) {
+      interpreter->ResizeInputTensor(i, input.shape);
+    }
+  }
+
+  if (interpreter->AllocateTensors() != kTfLiteOk) {
+    TFLITE_LOG(FATAL) << "Failed to allocate tensors!";
+  }
+}
+
+void BenchmarkTfLiteModel::RunImpl() {
+  bool use_nnapi = params_.Get<bool>("use_nnapi");
+  if (use_nnapi) {
+    if (nnfw_delegate_.Invoke(interpreter.get()) != kTfLiteOk) {
+      TFLITE_LOG(FATAL) << "Failed to invoke!";
+    }
+  } else {
+    if (interpreter->Invoke() != kTfLiteOk) {
+      TFLITE_LOG(FATAL) << "Failed to invoke!";
+    }
+  }
+}
+
+}  // namespace benchmark
+}  // namespace tflite
diff --git a/tests/tools/tflite_benchmark_model/profile_summarizer.cc b/tests/tools/tflite_benchmark_model/profile_summarizer.cc
new file mode 100644
index 000000000..ce19b0c98
--- /dev/null
+++ b/tests/tools/tflite_benchmark_model/profile_summarizer.cc
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/profiling/profile_summarizer.h"
+
+#include <sstream>
+
+#include "tensorflow/contrib/lite/schema/schema_generated.h"
+
+namespace tflite {
+namespace profiling {
+namespace {
+
+struct OperatorDetails {
+  std::string name;
+  std::vector<std::string> inputs;
+  std::vector<std::string> outputs;
+};
+
+std::string GetTensorName(const tflite::Interpreter& interpreter,
+                          int tensor_index) {
+  const auto tensor = interpreter.tensor(tensor_index);
+  if (tensor == nullptr || tensor->name == nullptr) {
+    return "Unknown";
+  }
+  return tensor->name;
+}
+std::vector<std::string> GetTensorNames(const tflite::Interpreter& interpreter,
+                                        const TfLiteIntArray* tensor_indices) {
+  std::vector<std::string> tensors;
+  tensors.reserve(tensor_indices->size);
+  for (int i = 0; i < tensor_indices->size; i++) {
+    tensors.push_back(GetTensorName(interpreter, tensor_indices->data[i]));
+  }
+  return tensors;
+}
+
+std::string ToString(const std::vector<std::string>& str_vector) {
+  std::stringstream stream;
+  stream << "[";
+  bool first = true;
+  for (const auto& s : str_vector) {
+    if (!first) {
+      stream << ", ";
+    } else {
+      first = false;
+    }
+    stream << s;
+  }
+  stream << "]";
+  return stream.str();
+}
+
+OperatorDetails GetOperatorDetails(const tflite::Interpreter& interpreter,
+                                   int node_index) {
+  auto node_reg = interpreter.node_and_registration(node_index);
+  auto inputs = node_reg->first.inputs;
+  auto outputs = node_reg->first.outputs;
+  int code = node_reg->second.builtin_code;
+  const char* op_name = nullptr;
+  if (code == tflite::BuiltinOperator_CUSTOM) {
+    const char* custom_name = node_reg->second.custom_name;
+    op_name = custom_name ? custom_name : "UnknownCustomOp";
+  } else {
+    op_name = tflite::EnumNamesBuiltinOperator()[code];
+  }
+  const char* profiling_string =
+      interpreter.OpProfilingString(node_reg->second, &node_reg->first);
+  OperatorDetails details;
+  details.name = op_name;
+  if (profiling_string) {
+    details.name += ":" + std::string(profiling_string);
+  }
+  details.inputs = GetTensorNames(interpreter, inputs);
+  details.outputs = GetTensorNames(interpreter, outputs);
+  return details;
+}
+
+tensorflow::StatSummarizerOptions GetProfileSummarizerOptions() {
+  auto options = tensorflow::StatSummarizerOptions();
+  options.show_summary = true;
+  options.show_memory = false;
+  return options;
+}
+
+}  // namespace
+
+ProfileSummarizer::ProfileSummarizer()
+    : stats_calculator_(
+          new ::tensorflow::StatsCalculator(GetProfileSummarizerOptions())) {}
+
+void ProfileSummarizer::ProcessProfiles(
+    const std::vector<const ProfileEvent*>& profile_stats,
+    const tflite::Interpreter& interpreter) {
+  std::vector<const ProfileEvent*> events;
+  std::copy_if(profile_stats.begin(), profile_stats.end(),
+               std::back_inserter(events), [](const ProfileEvent* e) {
+                 return e->event_type ==
+                            ProfileEvent::EventType::OPERATOR_INVOKE_EVENT &&
+                        e->end_timestamp_us >= e->begin_timestamp_us;
+               });
+  // Sort with begin_time.
+  std::sort(events.begin(), events.end(),
+            [](const ProfileEvent* const& a, const ProfileEvent* const& b) {
+              return a->begin_timestamp_us < b->begin_timestamp_us;
+            });
+  if (events.empty()) {
+    return;
+  }
+
+  int64_t base_start_us = events[0]->begin_timestamp_us;
+  int node_num = 0;
+  int64_t curr_total_us = 0;
+  int prev_op_idx = -1;
+  int child_op_no = 1;
+  for (auto event : events) {
+    auto op_details = GetOperatorDetails(interpreter, event->event_metadata);
+    bool from_same_op = (prev_op_idx == event->event_metadata);
+    child_op_no = from_same_op ? child_op_no + 1 : 1;
+    auto node_name = ToString(op_details.outputs) + "#" + std::to_string(child_op_no);
+    int64_t start_us = event->begin_timestamp_us - base_start_us;
+    int64_t node_exec_time =
+        event->end_timestamp_us - event->begin_timestamp_us;
+    stats_calculator_->AddNodeStats(node_name, op_details.name, node_num,
+                                    start_us, node_exec_time, 0 /*memory */);
+    curr_total_us += node_exec_time;
+    ++node_num;
+    prev_op_idx = event->event_metadata;
+  }
+  stats_calculator_->UpdateRunTotalUs(curr_total_us);
+}
+}  // namespace profiling
+}  // namespace tflite
diff --git a/tests/tools/tflite_benchmark_model/stats_calculator.cc b/tests/tools/tflite_benchmark_model/stats_calculator.cc
new file mode 100644
index 000000000..578650701
--- /dev/null
+++ b/tests/tools/tflite_benchmark_model/stats_calculator.cc
@@ -0,0 +1,317 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/util/stats_calculator.h"
+
+#include <iomanip>
+#include <map>
+#include <queue>
+#include <sstream>
+#include <string>
+#include <algorithm>
+
+namespace tensorflow {
+
+StatsCalculator::StatsCalculator(const StatSummarizerOptions& options)
+    : options_(options) {}
+
+std::string StatsCalculator::GetShortSummary() const {
+  std::stringstream stream;
+  stream << "Timings (microseconds): ";
+  run_total_us_.OutputToStream(&stream);
+  stream << std::endl;
+
+  stream << "Memory (bytes): ";
+  memory_.OutputToStream(&stream);
+  stream << std::endl;
+
+  stream << details_.size() << " nodes observed" << std::endl;
+  return stream.str();
+}
+
+std::ostream& InitField(std::ostream& stream, int width) {
+  stream << "\t" << std::right << std::setw(width) << std::fixed
+         << std::setprecision(3);
+  return stream;
+}
+
+std::string StatsCalculator::HeaderString(const std::string& title) const {
+  std::stringstream stream;
+
+  stream << "============================== " << title
+         << " ==============================" << std::endl;
+
+  InitField(stream, 24) << "[node type]";
+  InitField(stream, 9) << "[start]";
+  InitField(stream, 9) << "[first]";
+  InitField(stream, 9) << "[avg ms]";
+  InitField(stream, 8) << "[%]";
+  InitField(stream, 8) << "[cdf%]";
+  InitField(stream, 10) << "[mem KB]";
+  InitField(stream, 9) << "[times called]";
+  stream << "\t"
+         << "[Name]";
+  return stream.str();
+}
+
+std::string StatsCalculator::ColumnString(const Detail& detail,
+                                          const int64_t cumulative_stat_on_node,
+                                          const Stat<int64_t>& stat) const {
+  const double start_ms = detail.start_us.avg() / 1000.0;
+  const double first_time_ms = detail.rel_end_us.first() / 1000.0;
+  const double avg_time_ms = detail.rel_end_us.avg() / 1000.0;
+  const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum();
+  const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum();
+  const int64_t times_called = detail.times_called / num_runs();
+
+  std::stringstream stream;
+  InitField(stream, 24) << detail.type;
+  InitField(stream, 9) << start_ms;
+  InitField(stream, 9) << first_time_ms;
+  InitField(stream, 9) << avg_time_ms;
+  InitField(stream, 7) << percentage << "%";
+  InitField(stream, 7) << cdf_percentage << "%";
+  InitField(stream, 10) << detail.mem_used.newest() / 1000.0;
+  InitField(stream, 9) << times_called;
+  stream << "\t" << detail.name;
+
+  return stream.str();
+}
+
+void StatsCalculator::OrderNodesByMetric(
+    SortingMetric metric, std::vector<const Detail*>* details) const {
+  std::priority_queue<std::pair<std::string, const Detail*>> sorted_list;
+  const int num_nodes = details_.size();
+
+  for (const auto& det : details_) {
+    const Detail* detail = &(det.second);
+    std::stringstream stream;
+    stream << std::setw(20) << std::right << std::setprecision(10)
+           << std::fixed;
+
+    switch (metric) {
+      case BY_NAME:
+        stream << detail->name;
+        break;
+      case BY_RUN_ORDER:
+        stream << num_nodes - detail->run_order;
+        break;
+      case BY_TIME:
+        stream << detail->rel_end_us.avg();
+        break;
+      case BY_MEMORY:
+        stream << detail->mem_used.avg();
+        break;
+      case BY_TYPE:
+        stream << detail->type;
+        break;
+      default:
+        stream << "";
+        break;
+    }
+
+    sorted_list.emplace(stream.str(), detail);
+  }
+
+  while (!sorted_list.empty()) {
+    auto entry = sorted_list.top();
+    sorted_list.pop();
+    details->push_back(entry.second);
+  }
+}
+
+void StatsCalculator::ComputeStatsByType(
+    std::map<std::string, int64_t>* node_type_map_count,
+    std::map<std::string, int64_t>* node_type_map_time,
+    std::map<std::string, int64_t>* node_type_map_memory,
+    std::map<std::string, int64_t>* node_type_map_times_called,
+    int64_t* accumulated_us) const {
+  int64_t run_count = run_total_us_.count();
+
+  for (const auto& det : details_) {
+    const std::string node_name = det.first;
+    const Detail& detail = det.second;
+
+    int64_t curr_time_val =
+        static_cast<int64_t>(detail.rel_end_us.sum() / run_count);
+    *accumulated_us += curr_time_val;
+
+    int64_t curr_memory_val = detail.mem_used.newest();
+
+    const std::string& node_type = detail.type;
+
+    const std::string sharp1("#1");
+    bool first = std::mismatch(sharp1.rbegin(), sharp1.rend(), node_name.rbegin()).first == sharp1.rend();
+
+    if (first) {
+      (*node_type_map_count)[node_type] += 1;
+      (*node_type_map_times_called)[node_type] += detail.times_called / run_count;
+    }
+    (*node_type_map_time)[node_type] += curr_time_val;
+    (*node_type_map_memory)[node_type] += curr_memory_val;
+  }
+}
+
+std::string StatsCalculator::GetStatsByNodeType() const {
+  std::stringstream stream;
+
+  stream << "Number of nodes executed: " << details_.size() << std::endl;
+
+  stream << "============================== Summary by node type "
+            "=============================="
+         << std::endl;
+
+  std::map<std::string, int64_t> node_type_map_count;
+  std::map<std::string, int64_t> node_type_map_time;
+  std::map<std::string, int64_t> node_type_map_memory;
+  std::map<std::string, int64_t> node_type_map_times_called;
+  int64_t accumulated_us = 0;
+
+  ComputeStatsByType(&node_type_map_count, &node_type_map_time,
+                     &node_type_map_memory, &node_type_map_times_called,
+                     &accumulated_us);
+
+  // Sort them.
+  std::priority_queue<std::pair<int64_t, std::pair<std::string, int64_t>>>
+      timings;
+  for (const auto& node_type : node_type_map_time) {
+    const int64_t mem_used = node_type_map_memory[node_type.first];
+    timings.emplace(node_type.second,
+                    std::pair<std::string, int64_t>(node_type.first, mem_used));
+  }
+
+  InitField(stream, 24) << "[Node type]";
+  InitField(stream, 9) << "[count]";
+  InitField(stream, 10) << "[avg ms]";
+  InitField(stream, 11) << "[avg %]";
+  InitField(stream, 11) << "[cdf %]";
+  InitField(stream, 10) << "[mem KB]";
+  InitField(stream, 10) << "[times called]";
+  stream << std::endl;
+
+  float cdf = 0.0f;
+  while (!timings.empty()) {
+    auto entry = timings.top();
+    timings.pop();
+
+    const std::string node_type = entry.second.first;
+    const float memory = entry.second.second / 1000.0f;
+
+    const int64_t node_type_total_us = entry.first;
+    const float time_per_run_ms = node_type_total_us / 1000.0f;
+
+    const float percentage =
+        ((entry.first / static_cast<float>(accumulated_us)) * 100.0f);
+    cdf += percentage;
+
+    InitField(stream, 24) << node_type;
+    InitField(stream, 9) << node_type_map_count[node_type];
+    InitField(stream, 10) << time_per_run_ms;
+    InitField(stream, 10) << percentage << "%";
+    InitField(stream, 10) << cdf << "%";
+    InitField(stream, 10) << memory;
+    InitField(stream, 9) << node_type_map_times_called[node_type];
+    stream << std::endl;
+  }
+  stream << std::endl;
+  return stream.str();
+}
+
+std::string StatsCalculator::GetStatsByMetric(const std::string& title,
+                                              SortingMetric sorting_metric,
+                                              int num_stats) const {
+  std::vector<const Detail*> details;
+  OrderNodesByMetric(sorting_metric, &details);
+
+  double cumulative_stat_on_node = 0;
+
+  std::stringstream stream;
+  stream << HeaderString(title) << std::endl;
+  int stat_num = 0;
+  for (auto detail : details) {
+    ++stat_num;
+    if (num_stats > 0 && stat_num > num_stats) {
+      break;
+    }
+
+    // TODO(andrewharp): Make this keep track of the particular metric for cdf.
+    cumulative_stat_on_node += detail->rel_end_us.sum();
+    stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_)
+           << std::endl;
+  }
+  stream << std::endl;
+  return stream.str();
+}
+
+std::string StatsCalculator::GetOutputString() const {
+  std::stringstream stream;
+  if (options_.show_run_order) {
+    stream << GetStatsByMetric("Run Order", BY_RUN_ORDER,
+                               options_.run_order_limit);
+  }
+  if (options_.show_time) {
+    stream << GetStatsByMetric("Top by Computation Time", BY_TIME,
+                               options_.time_limit);
+  }
+  if (options_.show_memory) {
+    stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY,
+                               options_.memory_limit);
+  }
+  if (options_.show_type) {
+    stream << GetStatsByNodeType();
+  }
+  if (options_.show_summary) {
+    stream << GetShortSummary() << std::endl;
+  }
+  return stream.str();
+}
+
+void StatsCalculator::AddNodeStats(const std::string& name,
+                                   const std::string& type, int64_t run_order,
+                                   int64_t start_us, int64_t rel_end_us,
+                                   int64_t mem_used) {
+  Detail* detail = nullptr;
+  if (details_.find(name) == details_.end()) {
+    details_.insert({name, {}});
+    detail = &details_.at(name);
+    detail->type = type;
+    detail->name = name;
+    detail->run_order = run_order;
+  } else {
+    detail = &details_.at(name);
+  }
+  detail->start_us.UpdateStat(start_us);
+  detail->rel_end_us.UpdateStat(rel_end_us);
+  detail->mem_used.UpdateStat(mem_used);
+  detail->times_called++;
+}
+
+}  // namespace tensorflow
diff --git a/tests/tools/tflite_run/CMakeLists.txt b/tests/tools/tflite_run/CMakeLists.txt
new file mode 100644
index 000000000..49d87318f
--- /dev/null
+++ b/tests/tools/tflite_run/CMakeLists.txt
@@ -0,0 +1,26 @@
+list(APPEND TFLITE_RUN_SRCS "src/tflite_run.cc")
+list(APPEND TFLITE_RUN_SRCS "src/bin_image.cc")
+list(APPEND TFLITE_RUN_SRCS "src/args.cc")
+list(APPEND TFLITE_RUN_SRCS "src/tensor_dumper.cc")
+list(APPEND TFLITE_RUN_SRCS "src/tensor_loader.cc")
+
+add_executable(tflite_run ${TFLITE_RUN_SRCS})
+target_include_directories(tflite_run PRIVATE src)
+target_link_libraries(tflite_run tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_tflite)
+target_link_libraries(tflite_run boost_program_options boost_system boost_filesystem)
+
+install(TARGETS tflite_run DESTINATION bin)
+
+# TEST BUILD
+nnfw_find_package(GTest)
+
+if(NOT GTest_FOUND)
+  return()
+endif(NOT GTest_FOUND)
+
+## Add test cpp file
+add_executable(tflite_test src/tflite_test.cc)
+## Link test executable against gtest & gtest_main
+target_link_libraries(tflite_test gtest gtest_main ${LIB_PTHREAD})
+## install test binary for packaging
+install(TARGETS tflite_test DESTINATION unittest)
diff --git a/tests/tools/tflite_run/README.md b/tests/tools/tflite_run/README.md
new file mode 100644
index 000000000..35d2b6497
--- /dev/null
+++ b/tests/tools/tflite_run/README.md
@@ -0,0 +1,91 @@
+# tflite_run
+
+A simple Tensorflow Lite runner. It measures the elapsed time and optionally dump the input/output tensors or verify them.
+
+## Usage
+
+### Simple run
+
+This will run with random input data
+
+```
+$ ./tflite_run model.tflite
+```
+
+Output would look like:
+
+```
+input tensor indices = [0,]
+Input image size is smaller than the size required by the model. Input will not be set.
+output tensor indices = [308(max:984),]
+Prepare takes 0.00126718 seconds
+Invoke takes 7.09527 seconds
+```
+
+### Specifying input feature map
+
+We can specify input feature map, but it only accepts preprocessed data which means that the image files must be converted.
+
+TODO : Add input image preprocessing instruction
+
+```
+$ ./tflite_run model.tflite -i binary_input_file
+```
+
+### Dump the input and output tensors
+
+Dump the input and output tensors to a file.
+```
+$ ./tflite_run model.tflite --dump golden
+```
+
+Why we do this is usually for later verification. The tensors are written to name "golden".
+
+### Compare with the saved outputs
+
+The result from `tflite_run` and binary file are compared with `--compare` option.
+
+```
+$ ls golden
+golden
+$ ./tflite_run model.tflite --compare golden
+```
+
+The output would look like:
+
+```
+input tensor indices = [0,]
+Input image size is smaller than the size required by the model. Input will not be set.
+output tensor indices = [308(max:984),]
+Prepare takes 0.00126718 seconds
+Invoke takes 7.09527 seconds
+========================================
+Comparing the results with "golden2".
+========================================
+  Tensor #308: UNMATCHED
+    1 diffs are detected
+    Max absolute diff at [0, 0]
+       expected: 99
+       obtained: 0.000139008
+       absolute diff: 98.9999
+    Max relative diff at [0, 1007]
+       expected: 7.01825e-33
+       obtained: 0.000139011
+       relative diff: 1
+         (tolerance level = 8.38861e+06)
+```
+
+If `--compare` option is on, the exit code will be depend on its compare result. 0 for matched, other number for unmatched.
+
+## How Verification Works
+
+For verification, we may follow these steps:
+
+1. Generate and store the verfication data (run with option `--dump`)
+    1. Input Tensor does not matter as we will keep inputs along with outputs
+    1. Interpreter.Invoke()
+    1. Dump input tensors and output tensors to a file
+1. Give the dumped file for other runtime that we want to verify (run with option `--compare`)
+    1. Set interpreter's input to input tensor data from the file
+    1. Interpreter.Invoke()
+    1. Compare the results with output tensor data from the file
diff --git a/tests/tools/tflite_run/src/args.cc b/tests/tools/tflite_run/src/args.cc
new file mode 100644
index 000000000..713a0a9d2
--- /dev/null
+++ b/tests/tools/tflite_run/src/args.cc
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "args.h"
+
+#include <iostream>
+
+#include <boost/filesystem.hpp>
+
+namespace TFLiteRun
+{
+
+Args::Args(const int argc, char **argv)
+{
+  Initialize();
+  Parse(argc, argv);
+}
+
+void Args::Initialize(void)
+{
+
+  // General options
+  po::options_description general("General options");
+
+  // clang-format off
+  general.add_options()
+    ("help,h", "Display available options")
+    ("dump,d", po::value<std::string>()->default_value(""), "Output filename")
+    ("compare,c", po::value<std::string>()->default_value(""), "filename to be compared with")
+    ("tflite", po::value<std::string>()->required());
+  // clang-format on
+
+  _options.add(general);
+  _positional.add("tflite", 1);
+}
+
+void Args::Parse(const int argc, char **argv)
+{
+  po::variables_map vm;
+  po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
+            vm);
+  po::notify(vm);
+
+#if 0 // Enable this when we have mutually conflicting options
+  {
+    auto conflicting_options = [&](const std::string &o1, const std::string &o2) {
+      if ((vm.count(o1) && !vm[o1].defaulted()) && (vm.count(o2) && !vm[o2].defaulted()))
+      {
+        throw boost::program_options::error(std::string("Two options '") + o1 + "' and '" + o2 +
+                                            "' cannot be given at once.");
+      }
+    };
+
+    conflicting_options("input", "compare");
+  }
+#endif
+
+  if (vm.count("help"))
+  {
+    std::cout << "tflite_run\n\n";
+    std::cout << "Usage: " << argv[0] << " <.tflite> [<options>]\n\n";
+    std::cout << _options;
+    std::cout << "\n";
+
+    exit(0);
+  }
+
+  if (vm.count("dump"))
+  {
+    _dump_filename = vm["dump"].as<std::string>();
+  }
+
+  if (vm.count("compare"))
+  {
+    _compare_filename = vm["compare"].as<std::string>();
+  }
+
+  if (vm.count("tflite"))
+  {
+    _tflite_filename = vm["tflite"].as<std::string>();
+
+    if (_tflite_filename.empty())
+    {
+      // TODO Print usage instead of the below message
+      std::cerr << "Please specify tflite file. Run with `--help` for usage."
+                << "\n";
+
+      exit(1);
+    }
+    else
+    {
+      if (!boost::filesystem::exists(_tflite_filename))
+      {
+        std::cerr << "tflite file not found: " << _tflite_filename << "\n";
+      }
+    }
+  }
+}
+
+} // end of namespace TFLiteRun
diff --git a/tests/tools/tflite_run/src/args.h b/tests/tools/tflite_run/src/args.h
new file mode 100644
index 000000000..5561544eb
--- /dev/null
+++ b/tests/tools/tflite_run/src/args.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_RUN_ARGS_H__
+#define __TFLITE_RUN_ARGS_H__
+
+#include <string>
+#include <boost/program_options.hpp>
+
+namespace po = boost::program_options;
+
+namespace TFLiteRun
+{
+
+class Args
+{
+public:
+  Args(const int argc, char **argv);
+  void print(void);
+
+  const std::string &getTFLiteFilename(void) const { return _tflite_filename; }
+  const std::string &getDumpFilename(void) const { return _dump_filename; }
+  const std::string &getCompareFilename(void) const { return _compare_filename; }
+
+private:
+  void Initialize();
+  void Parse(const int argc, char **argv);
+
+private:
+  po::positional_options_description _positional;
+  po::options_description _options;
+
+  std::string _tflite_filename;
+  std::string _dump_filename;
+  std::string _compare_filename;
+};
+
+} // end of namespace TFLiteRun
+
+#endif // __TFLITE_RUN_ARGS_H__
diff --git a/tests/tools/tflite_run/src/bin_image.cc b/tests/tools/tflite_run/src/bin_image.cc
new file mode 100644
index 000000000..16d4c94f7
--- /dev/null
+++ b/tests/tools/tflite_run/src/bin_image.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <fstream>
+
+#include "bin_image.h"
+
+BinImage::BinImage(unsigned int width, unsigned int height, unsigned int channels)
+    : _width(width), _height(height), _channels(channels)
+{
+}
+
+BinImage::~BinImage() {}
+
+void BinImage::loadImage(const std::string &filename)
+{
+  std::ifstream fin(filename);
+
+  if (!fin)
+  {
+    std::cerr << "image filename is not specified. "
+              << "Input image will not be set." << std::endl;
+    return;
+  }
+
+  _image.reserve(_width * _height * _channels);
+
+  // Assuption: binary image is stored in the order of [H,W,C]
+  for (unsigned int i = 0; i < _width * _height * _channels; ++i)
+    _image.push_back(fin.get());
+}
+
+void BinImage::AssignTensor(TfLiteTensor *t)
+{
+  float *p = t->data.f;
+  const int IMAGE_MEAN = 128;
+  const float IMAGE_STD = 128.0f;
+
+  // to prevent runtime exception
+  if (_image.size() < _width * _height * _channels)
+  {
+    std::cerr << "Input image size is smaller than the size required by the model."
+              << " Input will not be set." << std::endl;
+    return;
+  }
+
+  for (int x = 0; x < _width; ++x)
+  {
+    for (int y = 0; y < _height; ++y)
+    {
+      for (int c = 0; c < _channels; ++c)
+      {
+        *p++ = (_image[y * _width * _channels + x * _channels + c] - IMAGE_MEAN) / IMAGE_STD;
+      }
+    }
+  }
+}
diff --git a/tests/tools/tflite_run/src/bin_image.h b/tests/tools/tflite_run/src/bin_image.h
new file mode 100644
index 000000000..845011be6
--- /dev/null
+++ b/tests/tools/tflite_run/src/bin_image.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_RUN_LIBJPEG_H__
+#define __TFLITE_RUN_LIBJPEG_H__
+
+#include <string>
+#include <vector>
+
+#include "tensorflow/contrib/lite/context.h"
+
+class BinImage
+{
+public:
+  BinImage(unsigned int width, unsigned int height, unsigned int channel);
+  ~BinImage();
+
+  void loadImage(const std::string &filename);
+
+  void AssignTensor(TfLiteTensor *t);
+
+private:
+  unsigned int _width;
+  unsigned int _height;
+  unsigned int _channels;
+
+  std::vector<unsigned char> _image;
+};
+
+#endif // __TFLITE_RUN_LIBJPEG_H__
diff --git a/tests/tools/tflite_run/src/tensor_dumper.cc b/tests/tools/tflite_run/src/tensor_dumper.cc
new file mode 100644
index 000000000..8568c9b67
--- /dev/null
+++ b/tests/tools/tflite_run/src/tensor_dumper.cc
@@ -0,0 +1,54 @@
+#include "tensor_dumper.h"
+
+#include <fstream>
+#include <iostream>
+#include <cstring>
+
+#include "tensorflow/contrib/lite/interpreter.h"
+
+namespace TFLiteRun
+{
+
+TensorDumper::TensorDumper()
+{
+  // DO NOTHING
+}
+
+void TensorDumper::addTensors(tflite::Interpreter &interpreter, const std::vector<int> &indices)
+{
+  for (const auto &o : indices)
+  {
+    const TfLiteTensor *tensor = interpreter.tensor(o);
+    int size = tensor->bytes;
+    std::vector<char> buffer;
+    buffer.resize(size);
+    memcpy(buffer.data(), tensor->data.raw, size);
+    _tensors.emplace_back(o, std::move(buffer));
+  }
+}
+
+void TensorDumper::dump(const std::string &filename) const
+{
+  // TODO Handle file open/write error
+  std::ofstream file(filename, std::ios::out | std::ios::binary);
+
+  // Write number of tensors
+  uint32_t num_tensors = static_cast<uint32_t>(_tensors.size());
+  file.write(reinterpret_cast<const char *>(&num_tensors), sizeof(num_tensors));
+
+  // Write tensor indices
+  for (const auto &t : _tensors)
+  {
+    file.write(reinterpret_cast<const char *>(&t._index), sizeof(int));
+  }
+
+  // Write data
+  for (const auto &t : _tensors)
+  {
+    file.write(t._data.data(), t._data.size());
+  }
+
+  file.close();
+}
+
+} // end of namespace TFLiteRun
diff --git a/tests/tools/tflite_run/src/tensor_dumper.h b/tests/tools/tflite_run/src/tensor_dumper.h
new file mode 100644
index 000000000..2805f1076
--- /dev/null
+++ b/tests/tools/tflite_run/src/tensor_dumper.h
@@ -0,0 +1,38 @@
+#ifndef __TFLITE_RUN_TENSOR_DUMPER_H__
+#define __TFLITE_RUN_TENSOR_DUMPER_H__
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace tflite
+{
+class Interpreter;
+}
+
+namespace TFLiteRun
+{
+
+class TensorDumper
+{
+private:
+  struct Tensor
+  {
+    int _index;
+    std::vector<char> _data;
+
+    Tensor(int index, std::vector<char> &&data) : _index(index), _data(std::move(data)) {}
+  };
+
+public:
+  TensorDumper();
+  void addTensors(tflite::Interpreter &interpreter, const std::vector<int> &indices);
+  void dump(const std::string &filename) const;
+
+private:
+  std::vector<Tensor> _tensors;
+};
+
+} // end of namespace TFLiteRun
+
+#endif // __TFLITE_RUN_TENSOR_DUMPER_H__
diff --git a/tests/tools/tflite_run/src/tensor_loader.cc b/tests/tools/tflite_run/src/tensor_loader.cc
new file mode 100644
index 000000000..934b78f40
--- /dev/null
+++ b/tests/tools/tflite_run/src/tensor_loader.cc
@@ -0,0 +1,67 @@
+#include "tensor_loader.h"
+
+#include <assert.h>
+
+#include <fstream>
+
+#include "misc/tensor/Shape.h"
+
+namespace TFLiteRun
+{
+
+TensorLoader::TensorLoader(tflite::Interpreter &interpreter)
+    : _interpreter(interpreter), _raw_data(nullptr)
+{
+}
+
+void TensorLoader::load(const std::string &filename)
+{
+  // TODO Handle file open/read error
+  std::ifstream file(filename, std::ios::ate | std::ios::binary);
+  size_t file_size = file.tellg();
+  file.seekg(0, std::ios::beg);
+
+  uint32_t num_tensors = 0;
+  file.read(reinterpret_cast<char *>(&num_tensors), sizeof(num_tensors));
+
+  int tensor_indices_raw[num_tensors];
+  file.read(reinterpret_cast<char *>(tensor_indices_raw), sizeof(tensor_indices_raw));
+  std::vector<int> tensor_indices(tensor_indices_raw, tensor_indices_raw + num_tensors);
+
+  _raw_data = std::unique_ptr<float>(new float[file_size]);
+  file.read(reinterpret_cast<char *>(_raw_data.get()), file_size);
+
+  size_t offset = 0;
+  for (const auto &o : tensor_indices)
+  {
+    const TfLiteTensor *tensor = _interpreter.tensor(o);
+
+    // Convert tensor shape to `Shape` from `tensor->dims`
+    nnfw::misc::tensor::Shape shape(static_cast<size_t>(tensor->dims->size));
+    for (int d = 0; d < tensor->dims->size; d++)
+    {
+      shape.dim(d) = tensor->dims->data[d];
+    }
+
+    float *base = _raw_data.get() + offset;
+
+    assert(tensor->bytes % sizeof(float) == 0);
+    offset += (tensor->bytes / sizeof(float));
+
+    _tensor_map.insert(std::make_pair(o, nnfw::tflite::TensorView<float>(shape, base)));
+  }
+
+  // The file size and total output tensor size must match
+  assert(file_size == sizeof(num_tensors) + sizeof(tensor_indices_raw) + offset * sizeof(float));
+
+  file.close();
+}
+
+const nnfw::tflite::TensorView<float> &TensorLoader::get(int tensor_idx) const
+{
+  auto found = _tensor_map.find(tensor_idx);
+  assert(found != _tensor_map.end());
+  return found->second;
+}
+
+} // end of namespace TFLiteRun
diff --git a/tests/tools/tflite_run/src/tensor_loader.h b/tests/tools/tflite_run/src/tensor_loader.h
new file mode 100644
index 000000000..fc4a37a08
--- /dev/null
+++ b/tests/tools/tflite_run/src/tensor_loader.h
@@ -0,0 +1,35 @@
+#ifndef __TFLITE_RUN_TENSOR_LOADER_H__
+#define __TFLITE_RUN_TENSOR_LOADER_H__
+
+#include <sys/mman.h>
+
+#include <string>
+#include <unordered_map>
+
+#include "tflite/TensorView.h"
+
+namespace tflite
+{
+class Interpreter;
+}
+
+namespace TFLiteRun
+{
+
+class TensorLoader
+{
+public:
+  TensorLoader(tflite::Interpreter &interpreter);
+  void load(const std::string &filename);
+  const nnfw::tflite::TensorView<float> &get(int tensor_idx) const;
+  size_t getNums() const { return _tensor_map.size(); }
+
+private:
+  tflite::Interpreter &_interpreter;
+  std::unique_ptr<float> _raw_data;
+  std::unordered_map<int, nnfw::tflite::TensorView<float>> _tensor_map;
+};
+
+} // end of namespace TFLiteRun
+
+#endif // __TFLITE_RUN_TENSOR_LOADER_H__
diff --git a/tests/tools/tflite_run/src/tflite_run.cc b/tests/tools/tflite_run/src/tflite_run.cc
new file mode 100644
index 000000000..5be6909e5
--- /dev/null
+++ b/tests/tools/tflite_run/src/tflite_run.cc
@@ -0,0 +1,262 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tflite/ext/kernels/register.h"
+#include "tensorflow/contrib/lite/model.h"
+
+#include "bin_image.h"
+#include "args.h"
+#include "tensor_dumper.h"
+#include "tensor_loader.h"
+#include "misc/benchmark.h"
+#include "misc/environment.h"
+#include "misc/fp32.h"
+#include "tflite/Diff.h"
+#include "tflite/Assert.h"
+#include "tflite/Session.h"
+#include "tflite/InterpreterSession.h"
+#include "tflite/NNAPISession.h"
+#include "misc/tensor/IndexIterator.h"
+#include "misc/tensor/Object.h"
+
+#include <iostream>
+#include <chrono>
+#include <algorithm>
+
+using namespace tflite;
+using namespace nnfw::tflite;
+using namespace std::placeholders; // for _1, _2 ...
+
+void print_max_idx(float *f, int size)
+{
+  float *p = std::max_element(f, f + size);
+  std::cout << "max:" << p - f;
+}
+
+int main(const int argc, char **argv)
+{
+  bool use_nnapi = false;
+
+  if (std::getenv("USE_NNAPI") != nullptr)
+  {
+    use_nnapi = true;
+  }
+
+  StderrReporter error_reporter;
+
+  TFLiteRun::Args args(argc, argv);
+
+  auto model = FlatBufferModel::BuildFromFile(args.getTFLiteFilename().c_str(), &error_reporter);
+  std::unique_ptr<Interpreter> interpreter;
+
+  std::chrono::milliseconds t_prepare(0);
+  std::chrono::milliseconds t_invoke(0);
+
+  nnfw::misc::benchmark::measure(t_prepare) << [&](void) {
+    BuiltinOpResolver resolver;
+
+    InterpreterBuilder builder(*model, resolver);
+
+    TFLITE_ENSURE(builder(&interpreter))
+
+    interpreter->SetNumThreads(1);
+  };
+
+  std::shared_ptr<nnfw::tflite::Session> sess;
+
+  if (use_nnapi)
+  {
+    sess = std::make_shared<nnfw::tflite::NNAPISession>(interpreter.get());
+  }
+  else
+  {
+    sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get());
+  }
+
+  sess->prepare();
+
+  TFLiteRun::TensorLoader tensor_loader(*interpreter);
+
+  // Load input from dumped tensor file.
+  if (!args.getCompareFilename().empty())
+  {
+    tensor_loader.load(args.getCompareFilename());
+
+    for (const auto &o : interpreter->inputs())
+    {
+      const auto &tensor_view = tensor_loader.get(o);
+      TfLiteTensor *tensor = interpreter->tensor(o);
+
+      memcpy(reinterpret_cast<void *>(tensor->data.f),
+             reinterpret_cast<const void *>(tensor_view._base), tensor->bytes);
+    }
+  }
+  else
+  {
+    const int seed = 1; /* TODO Add an option for seed value */
+    RandomGenerator randgen{seed, 0.0f, 2.0f};
+
+    // No input specified. So we fill the input tensors with random values.
+    for (const auto &o : interpreter->inputs())
+    {
+      TfLiteTensor *tensor = interpreter->tensor(o);
+      if (tensor->type == kTfLiteInt32)
+      {
+        // Generate singed 32-bit integer (s32) input
+        auto tensor_view = nnfw::tflite::TensorView<int32_t>::make(*interpreter, o);
+
+        int32_t value = 0;
+
+        nnfw::misc::tensor::iterate(tensor_view.shape())
+            << [&](const nnfw::misc::tensor::Index &ind) {
+                 // TODO Generate random values
+                 // Gather operation: index should be within input coverage.
+                 tensor_view.at(ind) = value;
+                 value++;
+               };
+      }
+      else if (tensor->type == kTfLiteUInt8)
+      {
+        // Generate unsigned 8-bit integer input
+        auto tensor_view = nnfw::tflite::TensorView<uint8_t>::make(*interpreter, o);
+
+        uint8_t value = 0;
+
+        nnfw::misc::tensor::iterate(tensor_view.shape())
+            << [&](const nnfw::misc::tensor::Index &ind) {
+                 // TODO Generate random values
+                 tensor_view.at(ind) = value;
+                 value = (value + 1) & 0xFF;
+               };
+      }
+      else if (tensor->type == kTfLiteBool)
+      {
+        // Generate bool input
+        auto tensor_view = nnfw::tflite::TensorView<bool>::make(*interpreter, o);
+
+        auto fp = static_cast<bool (RandomGenerator::*)(const ::nnfw::misc::tensor::Shape &,
+                                                        const ::nnfw::misc::tensor::Index &)>(
+            &RandomGenerator::generate<bool>);
+        const nnfw::misc::tensor::Object<bool> data(tensor_view.shape(),
+                                                    std::bind(fp, randgen, _1, _2));
+
+        nnfw::misc::tensor::iterate(tensor_view.shape())
+            << [&](const nnfw::misc::tensor::Index &ind) {
+                 const auto value = data.at(ind);
+                 tensor_view.at(ind) = value;
+               };
+      }
+      else
+      {
+        assert(tensor->type == kTfLiteFloat32);
+
+        const float *end = reinterpret_cast<const float *>(tensor->data.raw_const + tensor->bytes);
+        for (float *ptr = tensor->data.f; ptr < end; ptr++)
+        {
+          *ptr = randgen.generate<float>();
+        }
+      }
+    }
+  }
+
+  TFLiteRun::TensorDumper tensor_dumper;
+  // Must be called before `interpreter->Invoke()`
+  tensor_dumper.addTensors(*interpreter, interpreter->inputs());
+
+  std::cout << "input tensor indices = [";
+  for (const auto &o : interpreter->inputs())
+  {
+    std::cout << o << ",";
+  }
+  std::cout << "]" << std::endl;
+
+  nnfw::misc::benchmark::measure(t_invoke) << [&sess](void) {
+    if (!sess->run())
+    {
+      assert(0 && "run failed!");
+    }
+  };
+
+  sess->teardown();
+
+  // Must be called after `interpreter->Invoke()`
+  tensor_dumper.addTensors(*interpreter, interpreter->outputs());
+
+  std::cout << "output tensor indices = [";
+  for (const auto &o : interpreter->outputs())
+  {
+    std::cout << o << "(";
+
+    print_max_idx(interpreter->tensor(o)->data.f, interpreter->tensor(o)->bytes / sizeof(float));
+
+    std::cout << "),";
+  }
+  std::cout << "]" << std::endl;
+
+  std::cout << "Prepare takes " << t_prepare.count() / 1000.0 << " seconds" << std::endl;
+  std::cout << "Invoke takes " << t_invoke.count() / 1000.0 << " seconds" << std::endl;
+
+  if (!args.getDumpFilename().empty())
+  {
+    const std::string &dump_filename = args.getDumpFilename();
+    tensor_dumper.dump(dump_filename);
+    std::cout << "Input/output tensors have been dumped to file \"" << dump_filename << "\"."
+              << std::endl;
+  }
+
+  if (!args.getCompareFilename().empty())
+  {
+    const std::string &compare_filename = args.getCompareFilename();
+    std::cout << "========================================" << std::endl;
+    std::cout << "Comparing the results with \"" << compare_filename << "\"." << std::endl;
+    std::cout << "========================================" << std::endl;
+
+    // TODO Code duplication (copied from RandomTestRunner)
+
+    int tolerance = 1;
+    nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
+
+    auto equals = [tolerance](float lhs, float rhs) {
+      // NOTE Hybrid approach
+      // TODO Allow users to set tolerance for absolute_epsilon_equal
+      if (nnfw::misc::fp32::absolute_epsilon_equal(lhs, rhs))
+      {
+        return true;
+      }
+
+      return nnfw::misc::fp32::epsilon_equal(lhs, rhs, tolerance);
+    };
+
+    nnfw::misc::tensor::Comparator comparator(equals);
+    TfLiteInterpMatchApp app(comparator);
+    bool res = true;
+
+    for (const auto &o : interpreter->outputs())
+    {
+      auto expected = tensor_loader.get(o);
+      auto obtained = nnfw::tflite::TensorView<float>::make(*interpreter, o);
+
+      res = res && app.compareSingleTensorView(expected, obtained, o);
+    }
+
+    if (!res)
+    {
+      return 255;
+    }
+  }
+
+  return 0;
+}
diff --git a/tests/tools/tflite_run/src/tflite_test.cc b/tests/tools/tflite_run/src/tflite_test.cc
new file mode 100644
index 000000000..d0d36c229
--- /dev/null
+++ b/tests/tools/tflite_run/src/tflite_test.cc
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+
+TEST(TFLite_test_case, simple_test) { EXPECT_EQ(1, 1); }