summaryrefslogtreecommitdiff
path: root/tests/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tests/tools')
-rw-r--r--tests/tools/CMakeLists.txt8
-rw-r--r--tests/tools/nnapi_test/CMakeLists.txt5
-rw-r--r--tests/tools/nnapi_test/src/nnapi_test.cc55
-rw-r--r--tests/tools/tflite_benchmark/CMakeLists.txt5
-rw-r--r--tests/tools/tflite_benchmark/src/tflite_benchmark.cc239
-rw-r--r--tests/tools/tflite_benchmark_model/.FORMATDENY0
-rw-r--r--tests/tools/tflite_benchmark_model/CMakeLists.txt18
-rw-r--r--tests/tools/tflite_benchmark_model/README.md197
-rw-r--r--tests/tools/tflite_benchmark_model/benchmark_tflite_model.cc401
-rw-r--r--tests/tools/tflite_benchmark_model/profile_summarizer.cc161
-rw-r--r--tests/tools/tflite_benchmark_model/stats_calculator.cc317
-rw-r--r--tests/tools/tflite_run/CMakeLists.txt26
-rw-r--r--tests/tools/tflite_run/README.md91
-rw-r--r--tests/tools/tflite_run/src/args.cc113
-rw-r--r--tests/tools/tflite_run/src/args.h53
-rw-r--r--tests/tools/tflite_run/src/bin_image.cc71
-rw-r--r--tests/tools/tflite_run/src/bin_image.h43
-rw-r--r--tests/tools/tflite_run/src/tensor_dumper.cc54
-rw-r--r--tests/tools/tflite_run/src/tensor_dumper.h38
-rw-r--r--tests/tools/tflite_run/src/tensor_loader.cc67
-rw-r--r--tests/tools/tflite_run/src/tensor_loader.h35
-rw-r--r--tests/tools/tflite_run/src/tflite_run.cc262
-rw-r--r--tests/tools/tflite_run/src/tflite_test.cc19
23 files changed, 2278 insertions, 0 deletions
diff --git a/tests/tools/CMakeLists.txt b/tests/tools/CMakeLists.txt
new file mode 100644
index 000000000..b1eea12f9
--- /dev/null
+++ b/tests/tools/CMakeLists.txt
@@ -0,0 +1,8 @@
+set(EXCLUDE_DIR "")
+
+if(OBS_BUILD)
+ list(APPEND EXCLUDE_DIR tflite_benchmark_model)
+ list(APPEND EXCLUDE_DIR tflite_run)
+endif(OBS_BUILD)
+
+add_subdirectories(EXCLUDES ${EXCLUDE_DIR})
diff --git a/tests/tools/nnapi_test/CMakeLists.txt b/tests/tools/nnapi_test/CMakeLists.txt
new file mode 100644
index 000000000..b52f4f34b
--- /dev/null
+++ b/tests/tools/nnapi_test/CMakeLists.txt
@@ -0,0 +1,5 @@
+list(APPEND SOURCES "src/nnapi_test.cc")
+
+add_executable(nnapi_test ${SOURCES})
+target_link_libraries(nnapi_test nnfw_lib_tflite)
+install(TARGETS nnapi_test DESTINATION bin)
diff --git a/tests/tools/nnapi_test/src/nnapi_test.cc b/tests/tools/nnapi_test/src/nnapi_test.cc
new file mode 100644
index 000000000..73e80f01f
--- /dev/null
+++ b/tests/tools/nnapi_test/src/nnapi_test.cc
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tflite/ext/kernels/register.h"
+#include "tensorflow/contrib/lite/model.h"
+
+#include "tflite/interp/FlatBufferBuilder.h"
+#include "tflite/Diff.h"
+
+#include <iostream>
+#include <stdexcept>
+
+using namespace tflite;
+using namespace nnfw::tflite;
+
+int main(const int argc, char **argv)
+{
+ if (argc < 2)
+ {
+ std::cerr << "nnapi_test\n\n";
+ std::cerr << "Usage: " << argv[0] << " <.tflite>\n\n";
+ return 1;
+ }
+
+ const auto filename = argv[1];
+
+ StderrReporter error_reporter;
+
+ auto model = FlatBufferModel::BuildFromFile(filename, &error_reporter);
+
+ const nnfw::tflite::FlatBufferBuilder builder(*model);
+
+ try
+ {
+ return RandomTestRunner::make(0).run(builder);
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << e.what() << std::endl;
+ return 1;
+ }
+}
diff --git a/tests/tools/tflite_benchmark/CMakeLists.txt b/tests/tools/tflite_benchmark/CMakeLists.txt
new file mode 100644
index 000000000..56421a294
--- /dev/null
+++ b/tests/tools/tflite_benchmark/CMakeLists.txt
@@ -0,0 +1,5 @@
+list(APPEND SOURCES "src/tflite_benchmark.cc")
+
+add_executable(tflite_benchmark ${SOURCES})
+target_link_libraries(tflite_benchmark nnfw_lib_tflite tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_misc)
+install(TARGETS tflite_benchmark DESTINATION bin)
diff --git a/tests/tools/tflite_benchmark/src/tflite_benchmark.cc b/tests/tools/tflite_benchmark/src/tflite_benchmark.cc
new file mode 100644
index 000000000..b77afc189
--- /dev/null
+++ b/tests/tools/tflite_benchmark/src/tflite_benchmark.cc
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tflite/ext/kernels/register.h"
+#include "tensorflow/contrib/lite/model.h"
+
+#include "tflite/Assert.h"
+#include "tflite/Session.h"
+#include "tflite/InterpreterSession.h"
+#include "tflite/NNAPISession.h"
+#include "tflite/Diff.h"
+#include "misc/tensor/IndexIterator.h"
+
+#include <boost/accumulators/accumulators.hpp>
+#include <boost/accumulators/statistics/stats.hpp>
+#include <boost/accumulators/statistics/min.hpp>
+#include <boost/accumulators/statistics/max.hpp>
+#include <boost/accumulators/statistics/mean.hpp>
+
+#include <iostream>
+
+#include "misc/environment.h"
+#include "misc/benchmark.h"
+
+using namespace tflite;
+using namespace nnfw::tflite;
+
+void help(std::ostream &out, const int argc, char **argv)
+{
+ std::string cmd = argv[0];
+ auto pos = cmd.find_last_of("/");
+ if (pos != std::string::npos)
+ cmd = cmd.substr(pos + 1);
+
+ out << "use:" << std::endl << cmd << " <model file name>" << std::endl;
+}
+
+bool checkParams(const int argc, char **argv)
+{
+ if (argc < 2)
+ {
+ help(std::cerr, argc, argv);
+ return false;
+ }
+ return true;
+}
+
+int main(const int argc, char **argv)
+{
+
+ if (!checkParams(argc, argv))
+ {
+ return -1;
+ }
+
+ const auto filename = argv[1];
+
+ const bool use_nnapi = nnfw::misc::get_env_bool("USE_NNAPI");
+ const auto thread_count = nnfw::misc::get_env_int("THREAD", -1);
+
+ std::cout << "Num threads: " << thread_count << std::endl;
+ if (use_nnapi)
+ {
+ std::cout << "Use NNAPI" << std::endl;
+ }
+
+ StderrReporter error_reporter;
+
+ auto model = FlatBufferModel::BuildFromFile(filename, &error_reporter);
+ if (model == nullptr)
+ {
+ std::cerr << "Cannot create model" << std::endl;
+ return -1;
+ }
+
+ BuiltinOpResolver resolver;
+
+ InterpreterBuilder builder(*model, resolver);
+
+ std::unique_ptr<Interpreter> interpreter;
+
+ try
+ {
+ TFLITE_ENSURE(builder(&interpreter));
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << e.what() << std::endl;
+ return 1;
+ }
+
+ // Show inputs
+ for (uint32_t n = 0; n < interpreter->inputs().size(); ++n)
+ {
+ // TODO Print shape
+ auto tensor_id = interpreter->inputs().at(n);
+ auto tensor_ptr = interpreter->tensor(tensor_id);
+
+ std::cout << "Input #" << n << ":" << std::endl;
+ std::cout << " Name: " << tensor_ptr->name << std::endl;
+ }
+
+ // Show outputs
+ for (uint32_t n = 0; n < interpreter->outputs().size(); ++n)
+ {
+ // TODO Print shape
+ auto tensor_id = interpreter->outputs().at(n);
+ auto tensor_ptr = interpreter->tensor(tensor_id);
+
+ std::cout << "Output #" << n << ":" << std::endl;
+ std::cout << " Name: " << tensor_ptr->name << std::endl;
+ }
+
+ interpreter->SetNumThreads(thread_count);
+
+ std::shared_ptr<nnfw::tflite::Session> sess;
+
+ if (use_nnapi)
+ {
+ sess = std::make_shared<nnfw::tflite::NNAPISession>(interpreter.get());
+ }
+ else
+ {
+ sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get());
+ }
+
+ //
+ // Warming-up
+ //
+ for (uint32_t n = 0; n < 3; ++n)
+ {
+ std::chrono::milliseconds elapsed(0);
+
+ sess->prepare();
+
+ for (const auto &id : interpreter->inputs())
+ {
+ TfLiteTensor *tensor = interpreter->tensor(id);
+ if (tensor->type == kTfLiteInt32)
+ {
+ // Generate singed 32-bit integer (s32) input
+ auto tensor_view = nnfw::tflite::TensorView<int32_t>::make(*interpreter, id);
+
+ int32_t value = 0;
+
+ nnfw::misc::tensor::iterate(tensor_view.shape())
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ // TODO Generate random values
+ // Gather operation: index should be within input coverage.
+ tensor_view.at(ind) = value;
+ value++;
+ };
+ }
+ else if (tensor->type == kTfLiteUInt8)
+ {
+ // Generate unsigned 8-bit integer input
+ auto tensor_view = nnfw::tflite::TensorView<uint8_t>::make(*interpreter, id);
+
+ uint8_t value = 0;
+
+ nnfw::misc::tensor::iterate(tensor_view.shape())
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ // TODO Generate random values
+ tensor_view.at(ind) = value;
+ value = (value + 1) & 0xFF;
+ };
+ }
+ else
+ {
+ assert(tensor->type == kTfLiteFloat32);
+
+ const int seed = 1; /* TODO Add an option for seed value */
+ RandomGenerator randgen{seed, 0.0f, 0.2f};
+ const float *end = reinterpret_cast<const float *>(tensor->data.raw_const + tensor->bytes);
+ for (float *ptr = tensor->data.f; ptr < end; ptr++)
+ {
+ *ptr = randgen.generate<float>();
+ }
+ }
+ }
+
+ nnfw::misc::benchmark::measure(elapsed) << [&](void) {
+ if (!sess->run())
+ {
+ assert(0 && "run failed");
+ }
+ };
+ sess->teardown();
+
+ std::cout << "Warming-up " << n << ": " << elapsed.count() << "ms" << std::endl;
+ }
+
+ //
+ // Measure
+ //
+ const auto cnt = nnfw::misc::get_env_int("COUNT", 1);
+
+ using namespace boost::accumulators;
+
+ accumulator_set<double, stats<tag::mean, tag::min, tag::max>> acc;
+
+ for (int n = 0; n < cnt; ++n)
+ {
+ std::chrono::milliseconds elapsed(0);
+
+ sess->prepare();
+ nnfw::misc::benchmark::measure(elapsed) << [&](void) {
+ if (!sess->run())
+ {
+ assert(0 && "run failed");
+ }
+ };
+ sess->teardown();
+
+ acc(elapsed.count());
+
+ std::cout << "Iteration " << n << ": " << elapsed.count() << "ms" << std::endl;
+ }
+
+ std::cout << "--------" << std::endl;
+ std::cout << "Min: " << min(acc) << "ms" << std::endl;
+ std::cout << "Max: " << max(acc) << "ms" << std::endl;
+ std::cout << "Mean: " << mean(acc) << "ms" << std::endl;
+
+ return 0;
+}
diff --git a/tests/tools/tflite_benchmark_model/.FORMATDENY b/tests/tools/tflite_benchmark_model/.FORMATDENY
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tests/tools/tflite_benchmark_model/.FORMATDENY
diff --git a/tests/tools/tflite_benchmark_model/CMakeLists.txt b/tests/tools/tflite_benchmark_model/CMakeLists.txt
new file mode 100644
index 000000000..c48f658c1
--- /dev/null
+++ b/tests/tools/tflite_benchmark_model/CMakeLists.txt
@@ -0,0 +1,18 @@
+if (NOT BUILD_TFLITE_BENCHMARK_MODEL)
+ return()
+endif(NOT BUILD_TFLITE_BENCHMARK_MODEL)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+nnfw_find_package(TensorFlowSource REQUIRED)
+set(TENSORFLOW_LITE_BASE "${TensorFlowSource_DIR}/tensorflow/contrib/lite")
+list(APPEND SOURCES "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_main.cc"
+ "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_model.cc"
+ "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_params.cc"
+ "${TENSORFLOW_LITE_BASE}/tools/benchmark/command_line_flags.cc")
+
+add_executable(tflite_benchmark_model ${SOURCES})
+target_compile_definitions(tflite_benchmark_model PUBLIC "TFLITE_PROFILING_ENABLED")
+target_link_libraries(tflite_benchmark_model nnfw_lib_misc nnfw_lib_tflite nnfw_lib_profiling)
+target_link_libraries(tflite_benchmark_model tensorflow-lite ${LIB_PTHREAD} dl)
+install(TARGETS tflite_benchmark_model DESTINATION bin)
diff --git a/tests/tools/tflite_benchmark_model/README.md b/tests/tools/tflite_benchmark_model/README.md
new file mode 100644
index 000000000..8d997639f
--- /dev/null
+++ b/tests/tools/tflite_benchmark_model/README.md
@@ -0,0 +1,197 @@
+# TFLite Model Benchmark Tool
+
+## Description
+
+A simple C++ binary to benchmark a TFLite model and its individual operators,
+both on desktop machines and on Android. The binary takes a TFLite model,
+generates random inputs and then repeatedly runs the model for specified number
+of runs. Aggregrate latency statistics are reported after running the benchmark.
+
+The instructions below are for running the binary on Desktop and Android,
+for iOS please use the
+[iOS benchmark app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark/ios).
+
+## Parameters
+
+The binary takes the following required parameters:
+
+* `graph`: `string` \
+ The path to the TFLite model file.
+
+and the following optional parameters:
+
+* `num_threads`: `int` (default=1) \
+ The number of threads to use for running TFLite interpreter.
+* `warmup_runs`: `int` (default=1) \
+ The number of warmup runs to do before starting the benchmark.
+* `num_runs`: `int` (default=50) \
+ The number of runs. Increase this to reduce variance.
+* `run_delay`: `float` (default=-1.0) \
+ The delay in seconds between subsequent benchmark runs. Non-positive values
+ mean use no delay.
+* `use_nnapi`: `bool` (default=false) \
+ Whether to use [Android NNAPI](https://developer.android.com/ndk/guides/neuralnetworks/).
+ This API is available on recent Android devices.
+
+## To build/install/run
+
+### On Android:
+
+(0) Refer to https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android to edit the `WORKSPACE` to configure the android NDK/SDK.
+
+(1) Build for your specific platform, e.g.:
+
+```
+bazel build -c opt \
+ --config=android_arm \
+ --cxxopt='--std=c++11' \
+ tensorflow/contrib/lite/tools/benchmark:benchmark_model
+```
+
+(2) Connect your phone. Push the binary to your phone with adb push
+ (make the directory if required):
+
+```
+adb push bazel-bin/tensorflow/contrib/lite/tools/benchmark/benchmark_model /data/local/tmp
+```
+
+(3) Make the binary executable.
+
+```
+adb shell chmod +x /data/local/tmp/benchmark_model
+```
+
+(4) Push the compute graph that you need to test. For example:
+
+```
+adb push mobilenet_quant_v1_224.tflite /data/local/tmp
+```
+
+(5) Run the benchmark. For example:
+
+```
+adb shell /data/local/tmp/benchmark_model \
+ --graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \
+ --num_threads=4
+```
+
+### On desktop:
+(1) build the binary
+
+```
+bazel build -c opt tensorflow/contrib/lite/tools/benchmark:benchmark_model
+```
+
+(2) Run on your compute graph, similar to the Android case but without the need of adb shell.
+For example:
+
+```
+bazel-bin/tensorflow/contrib/lite/tools/benchmark/benchmark_model \
+ --graph=mobilenet_quant_v1_224.tflite \
+ --num_threads=4
+```
+
+The MobileNet graph used as an example here may be downloaded from [here](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip).
+
+
+## Reducing variance between runs on Android.
+
+Most modern Android phones use [ARM big.LITTLE](https://en.wikipedia.org/wiki/ARM_big.LITTLE)
+architecture where some cores are more power hungry but faster than other cores.
+When running benchmarks on these phones there can be significant variance
+between different runs of the benchmark. One way to reduce variance between runs
+is to set the [CPU affinity](https://en.wikipedia.org/wiki/Processor_affinity)
+before running the benchmark. On Android this can be done using the `taskset`
+command.
+E.g. for running the benchmark on big cores on Pixel 2 with a single thread one
+can use the following command:
+
+```
+adb shell taskset f0 /data/local/tmp/benchmark_model \
+ --graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \
+ --num_threads=1
+```
+
+where `f0` is the affinity mask for big cores on Pixel 2.
+Note: The affinity mask varies with the device.
+
+## Profiling model operators
+The benchmark model binary also allows you to profile operators and give execution times of each operator. To do this,
+compile the binary with a compiler flag that enables profiling to be compiled in. Pass **--copt=-DTFLITE_PROFILING_ENABLED**
+to compile benchmark with profiling support.
+For example, to compile with profiling support on Android, add this flag to the previous command:
+
+```
+bazel build -c opt \
+ --config=android_arm \
+ --cxxopt='--std=c++11' \
+ --copt=-DTFLITE_PROFILING_ENABLED \
+ tensorflow/contrib/lite/tools/benchmark:benchmark_model
+```
+This compiles TFLite with profiling enabled, now you can run the benchmark binary like before. The binary will produce detailed statistics for each operation similar to those shown below:
+
+```
+
+============================== Run Order ==============================
+ [node type] [start] [first] [avg ms] [%] [cdf%] [mem KB] [times called] [Name]
+ CONV_2D 0.000 4.269 4.269 0.107% 0.107% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_0/Relu6]
+ DEPTHWISE_CONV_2D 4.270 2.150 2.150 0.054% 0.161% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_1_depthwise/Relu6]
+ CONV_2D 6.421 6.107 6.107 0.153% 0.314% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6]
+ DEPTHWISE_CONV_2D 12.528 1.366 1.366 0.034% 0.348% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_2_depthwise/Relu6]
+ CONV_2D 13.895 4.195 4.195 0.105% 0.454% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_2_pointwise/Relu6]
+ DEPTHWISE_CONV_2D 18.091 1.260 1.260 0.032% 0.485% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_3_depthwise/Relu6]
+ CONV_2D 19.352 6.652 6.652 0.167% 0.652% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6]
+ DEPTHWISE_CONV_2D 26.005 0.698 0.698 0.018% 0.670% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_4_depthwise/Relu6]
+ CONV_2D 26.703 3.344 3.344 0.084% 0.754% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_4_pointwise/Relu6]
+ DEPTHWISE_CONV_2D 30.047 0.646 0.646 0.016% 0.770% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_5_depthwise/Relu6]
+ CONV_2D 30.694 5.800 5.800 0.145% 0.915% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6]
+ DEPTHWISE_CONV_2D 36.495 0.331 0.331 0.008% 0.924% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_6_depthwise/Relu6]
+ CONV_2D 36.826 2.838 2.838 0.071% 0.995% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_6_pointwise/Relu6]
+ DEPTHWISE_CONV_2D 39.665 0.439 0.439 0.011% 1.006% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_depthwise/Relu6]
+ CONV_2D 40.105 5.293 5.293 0.133% 1.139% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6]
+ DEPTHWISE_CONV_2D 45.399 0.352 0.352 0.009% 1.147% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_depthwise/Relu6]
+ CONV_2D 45.752 5.322 5.322 0.133% 1.281% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6]
+ DEPTHWISE_CONV_2D 51.075 0.357 0.357 0.009% 1.290% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_depthwise/Relu6]
+ CONV_2D 51.432 5.693 5.693 0.143% 1.433% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6]
+ DEPTHWISE_CONV_2D 57.126 0.366 0.366 0.009% 1.442% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_10_depthwise/Relu6]
+ CONV_2D 57.493 5.472 5.472 0.137% 1.579% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6]
+ DEPTHWISE_CONV_2D 62.966 0.364 0.364 0.009% 1.588% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_11_depthwise/Relu6]
+ CONV_2D 63.330 5.404 5.404 0.136% 1.724% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6]
+ DEPTHWISE_CONV_2D 68.735 0.155 0.155 0.004% 1.728% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_12_depthwise/Relu6]
+ CONV_2D 68.891 2.970 2.970 0.074% 1.802% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_12_pointwise/Relu6]
+ DEPTHWISE_CONV_2D 71.862 0.206 0.206 0.005% 1.807% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_13_depthwise/Relu6]
+ CONV_2D 72.069 5.888 5.888 0.148% 1.955% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6]
+ AVERAGE_POOL_2D 77.958 0.036 0.036 0.001% 1.956% 0.000 0 [MobilenetV1/Logits/AvgPool_1a/AvgPool]
+ CONV_2D 77.994 1.445 1.445 0.036% 1.992% 0.000 0 [MobilenetV1/Logits/Conv2d_1c_1x1/BiasAdd]
+ RESHAPE 79.440 0.002 0.002 0.000% 1.992% 0.000 0 [MobilenetV1/Predictions/Reshape]
+ SOFTMAX 79.443 0.029 0.029 0.001% 1.993% 0.000 0 [MobilenetV1/Predictions/Softmax]
+
+============================== Top by Computation Time ==============================
+ [node type] [start] [first] [avg ms] [%] [cdf%] [mem KB] [times called] [Name]
+ CONV_2D 19.352 6.652 6.652 0.167% 0.167% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6]
+ CONV_2D 6.421 6.107 6.107 0.153% 0.320% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6]
+ CONV_2D 72.069 5.888 5.888 0.148% 0.468% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6]
+ CONV_2D 30.694 5.800 5.800 0.145% 0.613% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6]
+ CONV_2D 51.432 5.693 5.693 0.143% 0.756% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6]
+ CONV_2D 57.493 5.472 5.472 0.137% 0.893% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6]
+ CONV_2D 63.330 5.404 5.404 0.136% 1.029% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6]
+ CONV_2D 45.752 5.322 5.322 0.133% 1.162% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6]
+ CONV_2D 40.105 5.293 5.293 0.133% 1.295% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6]
+ CONV_2D 0.000 4.269 4.269 0.107% 1.402% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_0/Relu6]
+
+Number of nodes executed: 31
+============================== Summary by node type ==============================
+ [Node type] [count] [avg ms] [avg %] [cdf %] [mem KB] [times called]
+ CONV_2D 15 1.406 89.270% 89.270% 0.000 0
+ DEPTHWISE_CONV_2D 13 0.169 10.730% 100.000% 0.000 0
+ SOFTMAX 1 0.000 0.000% 100.000% 0.000 0
+ RESHAPE 1 0.000 0.000% 100.000% 0.000 0
+ AVERAGE_POOL_2D 1 0.000 0.000% 100.000% 0.000 0
+
+Timings (microseconds): count=50 first=79449 curr=81350 min=77385 max=88213 avg=79732 std=1929
+Memory (bytes): count=0
+31 nodes observed
+
+
+Average inference timings in us: Warmup: 83235, Init: 38467, no stats: 79760.9
+```
diff --git a/tests/tools/tflite_benchmark_model/benchmark_tflite_model.cc b/tests/tools/tflite_benchmark_model/benchmark_tflite_model.cc
new file mode 100644
index 000000000..efc8bae52
--- /dev/null
+++ b/tests/tools/tflite_benchmark_model/benchmark_tflite_model.cc
@@ -0,0 +1,401 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h"
+
+#include <cstdarg>
+#include <cstdlib>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#ifdef TFLITE_FLEX
+#include "tensorflow/contrib/lite/delegates/flex/delegate.h"
+#endif // TFLITE_FLEX
+#include "tflite/ext/kernels/register.h"
+#include "tensorflow/contrib/lite/model.h"
+#include "tensorflow/contrib/lite/op_resolver.h"
+#include "tensorflow/contrib/lite/string_util.h"
+#include "tensorflow/contrib/lite/tools/benchmark/logging.h"
+
+// For profiling nnapi_delegate
+#include "profiling/profiling.h"
+#include "tflite/ext/nnapi_delegate.h"
+
+namespace {
+ nnfw::tflite::NNAPIDelegate nnfw_delegate_;
+}
+
+#ifdef TFLITE_CUSTOM_OPS_HEADER
+void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
+#endif
+
+namespace tflite {
+namespace benchmark {
+
+void ProfilingListener::SetInterpreter(tflite::Interpreter* interpreter) {
+ TFLITE_BENCHMARK_CHECK(interpreter);
+ interpreter_ = interpreter;
+ interpreter_->SetProfiler(&profiler_);
+}
+
+void ProfilingListener::OnSingleRunStart(RunType run_type) {
+ if (run_type == REGULAR) {
+ profiler_.Reset();
+ profiler_.StartProfiling();
+ }
+}
+
+void ProfilingListener::OnBenchmarkEnd(const BenchmarkResults& results) {
+ if (has_profiles_) {
+ TFLITE_LOG(INFO) << summarizer_.GetOutputString();
+ }
+}
+
+void ProfilingListener::OnSingleRunEnd() {
+ profiler_.StopProfiling();
+ auto profile_events = profiler_.GetProfileEvents();
+ has_profiles_ = !profile_events.empty();
+ summarizer_.ProcessProfiles(profile_events, *interpreter_);
+}
+
+namespace {
+
+std::vector<std::string> Split(const std::string& str, const char delim) {
+ std::istringstream input(str);
+ std::vector<std::string> results;
+ std::string item;
+ while (std::getline(input, item, delim)) {
+ results.push_back(item);
+ }
+ return results;
+}
+
+template <typename T>
+bool SplitAndParse(const std::string& str, char delim, std::vector<T>* values) {
+ std::istringstream input(str);
+ bool first = true;
+ while (!input.eof()) {
+ if (!first) {
+ char c;
+ input >> c;
+ if (c != delim) {
+ return false;
+ }
+ } else {
+ first = false;
+ }
+ T val;
+ input >> val;
+ if (!input.eof() && !input.good()) {
+ return false;
+ }
+ values->push_back(val);
+ }
+ return true;
+}
+
+template <typename T>
+void FillRandomValue(T* ptr, const std::vector<int>& sizes,
+ const std::function<T()>& random_func) {
+ int num_elements = 1;
+ for (int dim : sizes) {
+ num_elements *= dim;
+ }
+ for (int i = 0; i < num_elements; ++i) {
+ *ptr++ = random_func();
+ }
+}
+
+void FillRandomString(tflite::DynamicBuffer* buffer,
+ const std::vector<int>& sizes,
+ const std::function<string()>& random_func) {
+ int num_elements = 1;
+ for (int dim : sizes) {
+ num_elements *= dim;
+ }
+ for (int i = 0; i < num_elements; ++i) {
+ auto str = random_func();
+ buffer->AddString(str.data(), str.length());
+ }
+}
+
+bool PopulateInputLayerInfo(
+ const string& names_string, const string& shapes_string,
+ std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
+ std::vector<std::string> names = Split(names_string, ',');
+ std::vector<std::string> shapes = Split(shapes_string, ':');
+
+ if (names.size() != shapes.size()) {
+ TFLITE_LOG(ERROR) << "The number of items in"
+ << " --input_layer_shape (" << shapes_string << ", with "
+ << shapes.size() << " items)"
+ << " must match the number of items in"
+ << " --input_layer (" << names_string << ", with "
+ << names.size() << " items)."
+ << " For example --input_layer=input1,input2"
+ << " --input_layer_shape=1,224,224,4:1,20";
+ return false;
+ }
+
+ for (int i = 0; i < names.size(); ++i) {
+ info->push_back(BenchmarkTfLiteModel::InputLayerInfo());
+ BenchmarkTfLiteModel::InputLayerInfo& input = info->back();
+
+ input.name = names[i];
+
+ TFLITE_BENCHMARK_CHECK(SplitAndParse(shapes[i], ',', &input.shape))
+ << "Incorrect size string specified: " << shapes[i];
+ for (int dim : input.shape) {
+ if (dim == -1) {
+ TFLITE_LOG(ERROR)
+ << "Any unknown sizes in the shapes (-1's) must be replaced"
+ << " with the size you want to benchmark with.";
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+BenchmarkParams GetDefaultParams() {
+ BenchmarkParams default_params = BenchmarkModel::DefaultParams();
+ default_params.AddParam("graph", BenchmarkParam::Create<std::string>(""));
+ default_params.AddParam("input_layer",
+ BenchmarkParam::Create<std::string>(""));
+ default_params.AddParam("input_layer_shape",
+ BenchmarkParam::Create<std::string>(""));
+ default_params.AddParam("use_nnapi", BenchmarkParam::Create<bool>(false));
+ return default_params;
+}
+
+} // namespace
+
+BenchmarkTfLiteModel::BenchmarkTfLiteModel()
+ : BenchmarkModel(GetDefaultParams()) {
+ AddListener(&profiling_listener_);
+}
+
+BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params)
+ : BenchmarkModel(std::move(params)) {
+ AddListener(&profiling_listener_);
+}
+
+std::vector<Flag> BenchmarkTfLiteModel::GetFlags() {
+ std::vector<Flag> flags = BenchmarkTfLiteModel::BenchmarkModel::GetFlags();
+ std::vector<Flag> specific_flags = {
+ CreateFlag<std::string>("graph", &params_, "graph file name"),
+ CreateFlag<std::string>("input_layer", &params_, "input layer names"),
+ CreateFlag<std::string>("input_layer_shape", &params_,
+ "input layer shape"),
+ CreateFlag<bool>("use_nnapi", &params_, "use nnapi api")};
+
+ flags.insert(flags.end(), specific_flags.begin(), specific_flags.end());
+ return flags;
+}
+
+void BenchmarkTfLiteModel::LogParams() {
+ BenchmarkModel::LogParams();
+ TFLITE_LOG(INFO) << "Graph: [" << params_.Get<std::string>("graph") << "]";
+ TFLITE_LOG(INFO) << "Input layers: ["
+ << params_.Get<std::string>("input_layer") << "]";
+ TFLITE_LOG(INFO) << "Input shapes: ["
+ << params_.Get<std::string>("input_layer_shape") << "]";
+ TFLITE_LOG(INFO) << "Use nnapi : [" << params_.Get<bool>("use_nnapi") << "]";
+}
+
+bool BenchmarkTfLiteModel::ValidateParams() {
+ if (params_.Get<std::string>("graph").empty()) {
+ TFLITE_LOG(ERROR)
+ << "Please specify the name of your TF Lite input file with --graph";
+ return false;
+ }
+ return PopulateInputLayerInfo(params_.Get<std::string>("input_layer"),
+ params_.Get<std::string>("input_layer_shape"),
+ &inputs);
+}
+
+uint64_t BenchmarkTfLiteModel::ComputeInputBytes() {
+ TFLITE_BENCHMARK_CHECK(interpreter);
+ uint64_t total_input_bytes = 0;
+ for (int input : interpreter->inputs()) {
+ auto* t = interpreter->tensor(input);
+ total_input_bytes += t->bytes;
+ }
+ return total_input_bytes;
+}
+
+void BenchmarkTfLiteModel::PrepareInputsAndOutputs() {
+ auto interpreter_inputs = interpreter->inputs();
+ // Set the values of the input tensors.
+ for (int j = 0; j < inputs.size(); ++j) {
+ const InputLayerInfo& input = inputs[j];
+ int i = interpreter_inputs[j];
+ TfLiteTensor* t = interpreter->tensor(i);
+ std::vector<int> sizes = input.shape;
+
+ // TODO(ahentz): below we ignore the O-th dimension (number of batches).
+ if (t->type == kTfLiteFloat32) {
+ FillRandomValue<float>(
+ interpreter->typed_tensor<float>(i),
+ std::vector<int>(sizes.begin() + 1, sizes.end()),
+ []() { return static_cast<float>(rand()) / RAND_MAX - 0.5f; });
+ } else if (t->type == kTfLiteInt32) {
+ // TODO(yunluli): This is currently only used for handling embedding input
+ // for speech models. Generalize if necessary.
+ FillRandomValue<int32_t>(
+ interpreter->typed_tensor<int32_t>(i),
+ std::vector<int32_t>(sizes.begin() + 1, sizes.end()),
+ []() { return static_cast<int32_t>(rand()) % 100; });
+ } else if (t->type == kTfLiteUInt8) {
+ FillRandomValue<uint8_t>(
+ interpreter->typed_tensor<uint8_t>(i),
+ std::vector<int>(sizes.begin() + 1, sizes.end()),
+ []() { return static_cast<uint8_t>(rand()) % 255; });
+ } else if (t->type == kTfLiteString) {
+ tflite::DynamicBuffer buffer;
+ FillRandomString(&buffer, sizes, []() {
+ return "we're have some friends over saturday to hang out in the yard";
+ });
+ buffer.WriteToTensor(interpreter->tensor(i));
+ } else {
+ TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t->name
+ << " of type " << t->type;
+ }
+ }
+}
+
+void BenchmarkTfLiteModel::Init() {
+ std::string graph = params_.Get<std::string>("graph");
+ model = tflite::FlatBufferModel::BuildFromFile(graph.c_str());
+ if (!model) {
+ TFLITE_LOG(FATAL) << "Failed to mmap model " << graph;
+ }
+ TFLITE_LOG(INFO) << "Loaded model " << graph;
+ model->error_reporter();
+ TFLITE_LOG(INFO) << "resolved reporter";
+
+#ifdef TFLITE_CUSTOM_OPS_HEADER
+ tflite::MutableOpResolver resolver;
+ RegisterSelectedOps(&resolver);
+#else
+ nnfw::tflite::BuiltinOpResolver resolver;
+#endif
+
+ tflite::InterpreterBuilder(*model, resolver)(&interpreter);
+ if (!interpreter) {
+ TFLITE_LOG(FATAL) << "Failed to construct interpreter";
+ }
+ profiling_listener_.SetInterpreter(interpreter.get());
+ ::profiling::Context::get().setProfiler(interpreter->GetProfiler());
+
+ auto enable_sync = std::getenv("PROFILING_OP_SYNC");
+ if (enable_sync && std::atoi(enable_sync) != 0)
+ {
+ ::profiling::Context::get().setSync();
+ }
+
+ const int32_t num_threads = params_.Get<int32_t>("num_threads");
+
+ if (num_threads != -1) {
+ interpreter->SetNumThreads(num_threads);
+ }
+
+ bool use_nnapi = params_.Get<bool>("use_nnapi");
+
+ interpreter->UseNNAPI(use_nnapi);
+
+ if (use_nnapi) {
+ if (nnfw_delegate_.BuildGraph(interpreter.get()) != kTfLiteOk) {
+ TFLITE_LOG(FATAL) << "Failed to BuildGraph!";
+ }
+ }
+
+#ifdef TFLITE_FLEX
+ TFLITE_LOG(INFO) << "Instantiating Flex Delegate";
+ delegate_ = FlexDelegate::Create();
+ if (delegate_) {
+ interpreter->ModifyGraphWithDelegate(delegate_.get(),
+ /*allow_dynamic_tensors=*/true);
+ }
+#endif // TFLITE_FLEX
+
+ auto interpreter_inputs = interpreter->inputs();
+
+ if (!inputs.empty()) {
+ TFLITE_BENCHMARK_CHECK_EQ(inputs.size(), interpreter_inputs.size())
+ << "Inputs mismatch: Model inputs #:" << interpreter_inputs.size()
+ << " expected: " << inputs.size();
+ }
+
+ // TFLITE_BENCHMARK_CHECK that all names and types match
+ for (int j = 0; j < inputs.size(); ++j) {
+ const InputLayerInfo& input = inputs[j];
+ int i = interpreter_inputs[j];
+ TfLiteTensor* t = interpreter->tensor(i);
+ TFLITE_BENCHMARK_CHECK_EQ(t->name, input.name)
+ << "Tensor # " << i << " is named " << t->name << " but flags call it "
+ << input.name;
+ }
+
+ // Resize all non-string tensors.
+ for (int j = 0; j < inputs.size(); ++j) {
+ const InputLayerInfo& input = inputs[j];
+ int i = interpreter_inputs[j];
+ TfLiteTensor* t = interpreter->tensor(i);
+ if (t->type != kTfLiteString) {
+ interpreter->ResizeInputTensor(i, input.shape);
+ }
+ }
+
+ if (interpreter->AllocateTensors() != kTfLiteOk) {
+ TFLITE_LOG(FATAL) << "Failed to allocate tensors!";
+ }
+}
+
+void BenchmarkTfLiteModel::RunImpl() {
+ bool use_nnapi = params_.Get<bool>("use_nnapi");
+ if (use_nnapi) {
+ if (nnfw_delegate_.Invoke(interpreter.get()) != kTfLiteOk) {
+ TFLITE_LOG(FATAL) << "Failed to invoke!";
+ }
+ } else {
+ if (interpreter->Invoke() != kTfLiteOk) {
+ TFLITE_LOG(FATAL) << "Failed to invoke!";
+ }
+ }
+}
+
+} // namespace benchmark
+} // namespace tflite
diff --git a/tests/tools/tflite_benchmark_model/profile_summarizer.cc b/tests/tools/tflite_benchmark_model/profile_summarizer.cc
new file mode 100644
index 000000000..ce19b0c98
--- /dev/null
+++ b/tests/tools/tflite_benchmark_model/profile_summarizer.cc
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/lite/profiling/profile_summarizer.h"
+
+#include <sstream>
+
+#include "tensorflow/contrib/lite/schema/schema_generated.h"
+
+namespace tflite {
+namespace profiling {
+namespace {
+
+struct OperatorDetails {
+ std::string name;
+ std::vector<std::string> inputs;
+ std::vector<std::string> outputs;
+};
+
+std::string GetTensorName(const tflite::Interpreter& interpreter,
+ int tensor_index) {
+ const auto tensor = interpreter.tensor(tensor_index);
+ if (tensor == nullptr || tensor->name == nullptr) {
+ return "Unknown";
+ }
+ return tensor->name;
+}
+std::vector<std::string> GetTensorNames(const tflite::Interpreter& interpreter,
+ const TfLiteIntArray* tensor_indices) {
+ std::vector<std::string> tensors;
+ tensors.reserve(tensor_indices->size);
+ for (int i = 0; i < tensor_indices->size; i++) {
+ tensors.push_back(GetTensorName(interpreter, tensor_indices->data[i]));
+ }
+ return tensors;
+}
+
+std::string ToString(const std::vector<std::string>& str_vector) {
+ std::stringstream stream;
+ stream << "[";
+ bool first = true;
+ for (const auto& s : str_vector) {
+ if (!first) {
+ stream << ", ";
+ } else {
+ first = false;
+ }
+ stream << s;
+ }
+ stream << "]";
+ return stream.str();
+}
+
+OperatorDetails GetOperatorDetails(const tflite::Interpreter& interpreter,
+ int node_index) {
+ auto node_reg = interpreter.node_and_registration(node_index);
+ auto inputs = node_reg->first.inputs;
+ auto outputs = node_reg->first.outputs;
+ int code = node_reg->second.builtin_code;
+ const char* op_name = nullptr;
+ if (code == tflite::BuiltinOperator_CUSTOM) {
+ const char* custom_name = node_reg->second.custom_name;
+ op_name = custom_name ? custom_name : "UnknownCustomOp";
+ } else {
+ op_name = tflite::EnumNamesBuiltinOperator()[code];
+ }
+ const char* profiling_string =
+ interpreter.OpProfilingString(node_reg->second, &node_reg->first);
+ OperatorDetails details;
+ details.name = op_name;
+ if (profiling_string) {
+ details.name += ":" + std::string(profiling_string);
+ }
+ details.inputs = GetTensorNames(interpreter, inputs);
+ details.outputs = GetTensorNames(interpreter, outputs);
+ return details;
+}
+
+tensorflow::StatSummarizerOptions GetProfileSummarizerOptions() {
+ auto options = tensorflow::StatSummarizerOptions();
+ options.show_summary = true;
+ options.show_memory = false;
+ return options;
+}
+
+} // namespace
+
+ProfileSummarizer::ProfileSummarizer()
+ : stats_calculator_(
+ new ::tensorflow::StatsCalculator(GetProfileSummarizerOptions())) {}
+
+void ProfileSummarizer::ProcessProfiles(
+ const std::vector<const ProfileEvent*>& profile_stats,
+ const tflite::Interpreter& interpreter) {
+ std::vector<const ProfileEvent*> events;
+ std::copy_if(profile_stats.begin(), profile_stats.end(),
+ std::back_inserter(events), [](const ProfileEvent* e) {
+ return e->event_type ==
+ ProfileEvent::EventType::OPERATOR_INVOKE_EVENT &&
+ e->end_timestamp_us >= e->begin_timestamp_us;
+ });
+ // Sort with begin_time.
+ std::sort(events.begin(), events.end(),
+ [](const ProfileEvent* const& a, const ProfileEvent* const& b) {
+ return a->begin_timestamp_us < b->begin_timestamp_us;
+ });
+ if (events.empty()) {
+ return;
+ }
+
+ int64_t base_start_us = events[0]->begin_timestamp_us;
+ int node_num = 0;
+ int64_t curr_total_us = 0;
+ int prev_op_idx = -1;
+ int child_op_no = 1;
+ for (auto event : events) {
+ auto op_details = GetOperatorDetails(interpreter, event->event_metadata);
+ bool from_same_op = (prev_op_idx == event->event_metadata);
+ child_op_no = from_same_op ? child_op_no + 1 : 1;
+ auto node_name = ToString(op_details.outputs) + "#" + std::to_string(child_op_no);
+ int64_t start_us = event->begin_timestamp_us - base_start_us;
+ int64_t node_exec_time =
+ event->end_timestamp_us - event->begin_timestamp_us;
+ stats_calculator_->AddNodeStats(node_name, op_details.name, node_num,
+ start_us, node_exec_time, 0 /*memory */);
+ curr_total_us += node_exec_time;
+ ++node_num;
+ prev_op_idx = event->event_metadata;
+ }
+ stats_calculator_->UpdateRunTotalUs(curr_total_us);
+}
+} // namespace profiling
+} // namespace tflite
diff --git a/tests/tools/tflite_benchmark_model/stats_calculator.cc b/tests/tools/tflite_benchmark_model/stats_calculator.cc
new file mode 100644
index 000000000..578650701
--- /dev/null
+++ b/tests/tools/tflite_benchmark_model/stats_calculator.cc
@@ -0,0 +1,317 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/util/stats_calculator.h"
+
+#include <iomanip>
+#include <map>
+#include <queue>
+#include <sstream>
+#include <string>
+#include <algorithm>
+
+namespace tensorflow {
+
+StatsCalculator::StatsCalculator(const StatSummarizerOptions& options)
+ : options_(options) {}
+
+std::string StatsCalculator::GetShortSummary() const {
+ std::stringstream stream;
+ stream << "Timings (microseconds): ";
+ run_total_us_.OutputToStream(&stream);
+ stream << std::endl;
+
+ stream << "Memory (bytes): ";
+ memory_.OutputToStream(&stream);
+ stream << std::endl;
+
+ stream << details_.size() << " nodes observed" << std::endl;
+ return stream.str();
+}
+
+std::ostream& InitField(std::ostream& stream, int width) {
+ stream << "\t" << std::right << std::setw(width) << std::fixed
+ << std::setprecision(3);
+ return stream;
+}
+
+std::string StatsCalculator::HeaderString(const std::string& title) const {
+ std::stringstream stream;
+
+ stream << "============================== " << title
+ << " ==============================" << std::endl;
+
+ InitField(stream, 24) << "[node type]";
+ InitField(stream, 9) << "[start]";
+ InitField(stream, 9) << "[first]";
+ InitField(stream, 9) << "[avg ms]";
+ InitField(stream, 8) << "[%]";
+ InitField(stream, 8) << "[cdf%]";
+ InitField(stream, 10) << "[mem KB]";
+ InitField(stream, 9) << "[times called]";
+ stream << "\t"
+ << "[Name]";
+ return stream.str();
+}
+
+std::string StatsCalculator::ColumnString(const Detail& detail,
+ const int64_t cumulative_stat_on_node,
+ const Stat<int64_t>& stat) const {
+ const double start_ms = detail.start_us.avg() / 1000.0;
+ const double first_time_ms = detail.rel_end_us.first() / 1000.0;
+ const double avg_time_ms = detail.rel_end_us.avg() / 1000.0;
+ const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum();
+ const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum();
+ const int64_t times_called = detail.times_called / num_runs();
+
+ std::stringstream stream;
+ InitField(stream, 24) << detail.type;
+ InitField(stream, 9) << start_ms;
+ InitField(stream, 9) << first_time_ms;
+ InitField(stream, 9) << avg_time_ms;
+ InitField(stream, 7) << percentage << "%";
+ InitField(stream, 7) << cdf_percentage << "%";
+ InitField(stream, 10) << detail.mem_used.newest() / 1000.0;
+ InitField(stream, 9) << times_called;
+ stream << "\t" << detail.name;
+
+ return stream.str();
+}
+
+void StatsCalculator::OrderNodesByMetric(
+ SortingMetric metric, std::vector<const Detail*>* details) const {
+ std::priority_queue<std::pair<std::string, const Detail*>> sorted_list;
+ const int num_nodes = details_.size();
+
+ for (const auto& det : details_) {
+ const Detail* detail = &(det.second);
+ std::stringstream stream;
+ stream << std::setw(20) << std::right << std::setprecision(10)
+ << std::fixed;
+
+ switch (metric) {
+ case BY_NAME:
+ stream << detail->name;
+ break;
+ case BY_RUN_ORDER:
+ stream << num_nodes - detail->run_order;
+ break;
+ case BY_TIME:
+ stream << detail->rel_end_us.avg();
+ break;
+ case BY_MEMORY:
+ stream << detail->mem_used.avg();
+ break;
+ case BY_TYPE:
+ stream << detail->type;
+ break;
+ default:
+ stream << "";
+ break;
+ }
+
+ sorted_list.emplace(stream.str(), detail);
+ }
+
+ while (!sorted_list.empty()) {
+ auto entry = sorted_list.top();
+ sorted_list.pop();
+ details->push_back(entry.second);
+ }
+}
+
+void StatsCalculator::ComputeStatsByType(
+ std::map<std::string, int64_t>* node_type_map_count,
+ std::map<std::string, int64_t>* node_type_map_time,
+ std::map<std::string, int64_t>* node_type_map_memory,
+ std::map<std::string, int64_t>* node_type_map_times_called,
+ int64_t* accumulated_us) const {
+ int64_t run_count = run_total_us_.count();
+
+ for (const auto& det : details_) {
+ const std::string node_name = det.first;
+ const Detail& detail = det.second;
+
+ int64_t curr_time_val =
+ static_cast<int64_t>(detail.rel_end_us.sum() / run_count);
+ *accumulated_us += curr_time_val;
+
+ int64_t curr_memory_val = detail.mem_used.newest();
+
+ const std::string& node_type = detail.type;
+
+ const std::string sharp1("#1");
+ bool first = std::mismatch(sharp1.rbegin(), sharp1.rend(), node_name.rbegin()).first == sharp1.rend();
+
+ if (first) {
+ (*node_type_map_count)[node_type] += 1;
+ (*node_type_map_times_called)[node_type] += detail.times_called / run_count;
+ }
+ (*node_type_map_time)[node_type] += curr_time_val;
+ (*node_type_map_memory)[node_type] += curr_memory_val;
+ }
+}
+
+std::string StatsCalculator::GetStatsByNodeType() const {
+ std::stringstream stream;
+
+ stream << "Number of nodes executed: " << details_.size() << std::endl;
+
+ stream << "============================== Summary by node type "
+ "=============================="
+ << std::endl;
+
+ std::map<std::string, int64_t> node_type_map_count;
+ std::map<std::string, int64_t> node_type_map_time;
+ std::map<std::string, int64_t> node_type_map_memory;
+ std::map<std::string, int64_t> node_type_map_times_called;
+ int64_t accumulated_us = 0;
+
+ ComputeStatsByType(&node_type_map_count, &node_type_map_time,
+ &node_type_map_memory, &node_type_map_times_called,
+ &accumulated_us);
+
+ // Sort them.
+ std::priority_queue<std::pair<int64_t, std::pair<std::string, int64_t>>>
+ timings;
+ for (const auto& node_type : node_type_map_time) {
+ const int64_t mem_used = node_type_map_memory[node_type.first];
+ timings.emplace(node_type.second,
+ std::pair<std::string, int64_t>(node_type.first, mem_used));
+ }
+
+ InitField(stream, 24) << "[Node type]";
+ InitField(stream, 9) << "[count]";
+ InitField(stream, 10) << "[avg ms]";
+ InitField(stream, 11) << "[avg %]";
+ InitField(stream, 11) << "[cdf %]";
+ InitField(stream, 10) << "[mem KB]";
+ InitField(stream, 10) << "[times called]";
+ stream << std::endl;
+
+ float cdf = 0.0f;
+ while (!timings.empty()) {
+ auto entry = timings.top();
+ timings.pop();
+
+ const std::string node_type = entry.second.first;
+ const float memory = entry.second.second / 1000.0f;
+
+ const int64_t node_type_total_us = entry.first;
+ const float time_per_run_ms = node_type_total_us / 1000.0f;
+
+ const float percentage =
+ ((entry.first / static_cast<float>(accumulated_us)) * 100.0f);
+ cdf += percentage;
+
+ InitField(stream, 24) << node_type;
+ InitField(stream, 9) << node_type_map_count[node_type];
+ InitField(stream, 10) << time_per_run_ms;
+ InitField(stream, 10) << percentage << "%";
+ InitField(stream, 10) << cdf << "%";
+ InitField(stream, 10) << memory;
+ InitField(stream, 9) << node_type_map_times_called[node_type];
+ stream << std::endl;
+ }
+ stream << std::endl;
+ return stream.str();
+}
+
+std::string StatsCalculator::GetStatsByMetric(const std::string& title,
+ SortingMetric sorting_metric,
+ int num_stats) const {
+ std::vector<const Detail*> details;
+ OrderNodesByMetric(sorting_metric, &details);
+
+ double cumulative_stat_on_node = 0;
+
+ std::stringstream stream;
+ stream << HeaderString(title) << std::endl;
+ int stat_num = 0;
+ for (auto detail : details) {
+ ++stat_num;
+ if (num_stats > 0 && stat_num > num_stats) {
+ break;
+ }
+
+ // TODO(andrewharp): Make this keep track of the particular metric for cdf.
+ cumulative_stat_on_node += detail->rel_end_us.sum();
+ stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_)
+ << std::endl;
+ }
+ stream << std::endl;
+ return stream.str();
+}
+
+std::string StatsCalculator::GetOutputString() const {
+ std::stringstream stream;
+ if (options_.show_run_order) {
+ stream << GetStatsByMetric("Run Order", BY_RUN_ORDER,
+ options_.run_order_limit);
+ }
+ if (options_.show_time) {
+ stream << GetStatsByMetric("Top by Computation Time", BY_TIME,
+ options_.time_limit);
+ }
+ if (options_.show_memory) {
+ stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY,
+ options_.memory_limit);
+ }
+ if (options_.show_type) {
+ stream << GetStatsByNodeType();
+ }
+ if (options_.show_summary) {
+ stream << GetShortSummary() << std::endl;
+ }
+ return stream.str();
+}
+
+void StatsCalculator::AddNodeStats(const std::string& name,
+ const std::string& type, int64_t run_order,
+ int64_t start_us, int64_t rel_end_us,
+ int64_t mem_used) {
+ Detail* detail = nullptr;
+ if (details_.find(name) == details_.end()) {
+ details_.insert({name, {}});
+ detail = &details_.at(name);
+ detail->type = type;
+ detail->name = name;
+ detail->run_order = run_order;
+ } else {
+ detail = &details_.at(name);
+ }
+ detail->start_us.UpdateStat(start_us);
+ detail->rel_end_us.UpdateStat(rel_end_us);
+ detail->mem_used.UpdateStat(mem_used);
+ detail->times_called++;
+}
+
+} // namespace tensorflow
diff --git a/tests/tools/tflite_run/CMakeLists.txt b/tests/tools/tflite_run/CMakeLists.txt
new file mode 100644
index 000000000..49d87318f
--- /dev/null
+++ b/tests/tools/tflite_run/CMakeLists.txt
@@ -0,0 +1,26 @@
+list(APPEND TFLITE_RUN_SRCS "src/tflite_run.cc")
+list(APPEND TFLITE_RUN_SRCS "src/bin_image.cc")
+list(APPEND TFLITE_RUN_SRCS "src/args.cc")
+list(APPEND TFLITE_RUN_SRCS "src/tensor_dumper.cc")
+list(APPEND TFLITE_RUN_SRCS "src/tensor_loader.cc")
+
+add_executable(tflite_run ${TFLITE_RUN_SRCS})
+target_include_directories(tflite_run PRIVATE src)
+target_link_libraries(tflite_run tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_tflite)
+target_link_libraries(tflite_run boost_program_options boost_system boost_filesystem)
+
+install(TARGETS tflite_run DESTINATION bin)
+
+# TEST BUILD
+nnfw_find_package(GTest)
+
+if(NOT GTest_FOUND)
+ return()
+endif(NOT GTest_FOUND)
+
+## Add test cpp file
+add_executable(tflite_test src/tflite_test.cc)
+## Link test executable against gtest & gtest_main
+target_link_libraries(tflite_test gtest gtest_main ${LIB_PTHREAD})
+## install test binary for packaging
+install(TARGETS tflite_test DESTINATION unittest)
diff --git a/tests/tools/tflite_run/README.md b/tests/tools/tflite_run/README.md
new file mode 100644
index 000000000..35d2b6497
--- /dev/null
+++ b/tests/tools/tflite_run/README.md
@@ -0,0 +1,91 @@
+# tflite_run
+
+A simple Tensorflow Lite runner. It measures the elapsed time and optionally dump the input/output tensors or verify them.
+
+## Usage
+
+### Simple run
+
+This will run with random input data
+
+```
+$ ./tflite_run model.tflite
+```
+
+Output would look like:
+
+```
+input tensor indices = [0,]
+Input image size is smaller than the size required by the model. Input will not be set.
+output tensor indices = [308(max:984),]
+Prepare takes 0.00126718 seconds
+Invoke takes 7.09527 seconds
+```
+
+### Specifying input feature map
+
+We can specify input feature map, but it only accepts preprocessed data which means that the image files must be converted.
+
+TODO : Add input image preprocessing instruction
+
+```
+$ ./tflite_run model.tflite -i binary_input_file
+```
+
+### Dump the input and output tensors
+
+Dump the input and output tensors to a file.
+```
+$ ./tflite_run model.tflite --dump golden
+```
+
+Why we do this is usually for later verification. The tensors are written to name "golden".
+
+### Compare with the saved outputs
+
+The result from `tflite_run` and binary file are compared with `--compare` option.
+
+```
+$ ls golden
+golden
+$ ./tflite_run model.tflite --compare golden
+```
+
+The output would look like:
+
+```
+input tensor indices = [0,]
+Input image size is smaller than the size required by the model. Input will not be set.
+output tensor indices = [308(max:984),]
+Prepare takes 0.00126718 seconds
+Invoke takes 7.09527 seconds
+========================================
+Comparing the results with "golden2".
+========================================
+ Tensor #308: UNMATCHED
+ 1 diffs are detected
+ Max absolute diff at [0, 0]
+ expected: 99
+ obtained: 0.000139008
+ absolute diff: 98.9999
+ Max relative diff at [0, 1007]
+ expected: 7.01825e-33
+ obtained: 0.000139011
+ relative diff: 1
+ (tolerance level = 8.38861e+06)
+```
+
+If `--compare` option is on, the exit code will be depend on its compare result. 0 for matched, other number for unmatched.
+
+## How Verification Works
+
+For verification, we may follow these steps:
+
+1. Generate and store the verfication data (run with option `--dump`)
+ 1. Input Tensor does not matter as we will keep inputs along with outputs
+ 1. Interpreter.Invoke()
+ 1. Dump input tensors and output tensors to a file
+1. Give the dumped file for other runtime that we want to verify (run with option `--compare`)
+ 1. Set interpreter's input to input tensor data from the file
+ 1. Interpreter.Invoke()
+ 1. Compare the results with output tensor data from the file
diff --git a/tests/tools/tflite_run/src/args.cc b/tests/tools/tflite_run/src/args.cc
new file mode 100644
index 000000000..713a0a9d2
--- /dev/null
+++ b/tests/tools/tflite_run/src/args.cc
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "args.h"
+
+#include <iostream>
+
+#include <boost/filesystem.hpp>
+
+namespace TFLiteRun
+{
+
+Args::Args(const int argc, char **argv)
+{
+ Initialize();
+ Parse(argc, argv);
+}
+
+void Args::Initialize(void)
+{
+
+ // General options
+ po::options_description general("General options");
+
+ // clang-format off
+ general.add_options()
+ ("help,h", "Display available options")
+ ("dump,d", po::value<std::string>()->default_value(""), "Output filename")
+ ("compare,c", po::value<std::string>()->default_value(""), "filename to be compared with")
+ ("tflite", po::value<std::string>()->required());
+ // clang-format on
+
+ _options.add(general);
+ _positional.add("tflite", 1);
+}
+
+void Args::Parse(const int argc, char **argv)
+{
+ po::variables_map vm;
+ po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(),
+ vm);
+ po::notify(vm);
+
+#if 0 // Enable this when we have mutually conflicting options
+ {
+ auto conflicting_options = [&](const std::string &o1, const std::string &o2) {
+ if ((vm.count(o1) && !vm[o1].defaulted()) && (vm.count(o2) && !vm[o2].defaulted()))
+ {
+ throw boost::program_options::error(std::string("Two options '") + o1 + "' and '" + o2 +
+ "' cannot be given at once.");
+ }
+ };
+
+ conflicting_options("input", "compare");
+ }
+#endif
+
+ if (vm.count("help"))
+ {
+ std::cout << "tflite_run\n\n";
+ std::cout << "Usage: " << argv[0] << " <.tflite> [<options>]\n\n";
+ std::cout << _options;
+ std::cout << "\n";
+
+ exit(0);
+ }
+
+ if (vm.count("dump"))
+ {
+ _dump_filename = vm["dump"].as<std::string>();
+ }
+
+ if (vm.count("compare"))
+ {
+ _compare_filename = vm["compare"].as<std::string>();
+ }
+
+ if (vm.count("tflite"))
+ {
+ _tflite_filename = vm["tflite"].as<std::string>();
+
+ if (_tflite_filename.empty())
+ {
+ // TODO Print usage instead of the below message
+ std::cerr << "Please specify tflite file. Run with `--help` for usage."
+ << "\n";
+
+ exit(1);
+ }
+ else
+ {
+ if (!boost::filesystem::exists(_tflite_filename))
+ {
+ std::cerr << "tflite file not found: " << _tflite_filename << "\n";
+ }
+ }
+ }
+}
+
+} // end of namespace TFLiteRun
diff --git a/tests/tools/tflite_run/src/args.h b/tests/tools/tflite_run/src/args.h
new file mode 100644
index 000000000..5561544eb
--- /dev/null
+++ b/tests/tools/tflite_run/src/args.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_RUN_ARGS_H__
+#define __TFLITE_RUN_ARGS_H__
+
+#include <string>
+#include <boost/program_options.hpp>
+
+namespace po = boost::program_options;
+
+namespace TFLiteRun
+{
+
+class Args
+{
+public:
+ Args(const int argc, char **argv);
+ void print(void);
+
+ const std::string &getTFLiteFilename(void) const { return _tflite_filename; }
+ const std::string &getDumpFilename(void) const { return _dump_filename; }
+ const std::string &getCompareFilename(void) const { return _compare_filename; }
+
+private:
+ void Initialize();
+ void Parse(const int argc, char **argv);
+
+private:
+ po::positional_options_description _positional;
+ po::options_description _options;
+
+ std::string _tflite_filename;
+ std::string _dump_filename;
+ std::string _compare_filename;
+};
+
+} // end of namespace TFLiteRun
+
+#endif // __TFLITE_RUN_ARGS_H__
diff --git a/tests/tools/tflite_run/src/bin_image.cc b/tests/tools/tflite_run/src/bin_image.cc
new file mode 100644
index 000000000..16d4c94f7
--- /dev/null
+++ b/tests/tools/tflite_run/src/bin_image.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <fstream>
+
+#include "bin_image.h"
+
+BinImage::BinImage(unsigned int width, unsigned int height, unsigned int channels)
+ : _width(width), _height(height), _channels(channels)
+{
+}
+
+BinImage::~BinImage() {}
+
+void BinImage::loadImage(const std::string &filename)
+{
+ std::ifstream fin(filename);
+
+ if (!fin)
+ {
+ std::cerr << "image filename is not specified. "
+ << "Input image will not be set." << std::endl;
+ return;
+ }
+
+ _image.reserve(_width * _height * _channels);
+
+ // Assuption: binary image is stored in the order of [H,W,C]
+ for (unsigned int i = 0; i < _width * _height * _channels; ++i)
+ _image.push_back(fin.get());
+}
+
+void BinImage::AssignTensor(TfLiteTensor *t)
+{
+ float *p = t->data.f;
+ const int IMAGE_MEAN = 128;
+ const float IMAGE_STD = 128.0f;
+
+ // to prevent runtime exception
+ if (_image.size() < _width * _height * _channels)
+ {
+ std::cerr << "Input image size is smaller than the size required by the model."
+ << " Input will not be set." << std::endl;
+ return;
+ }
+
+ for (int x = 0; x < _width; ++x)
+ {
+ for (int y = 0; y < _height; ++y)
+ {
+ for (int c = 0; c < _channels; ++c)
+ {
+ *p++ = (_image[y * _width * _channels + x * _channels + c] - IMAGE_MEAN) / IMAGE_STD;
+ }
+ }
+ }
+}
diff --git a/tests/tools/tflite_run/src/bin_image.h b/tests/tools/tflite_run/src/bin_image.h
new file mode 100644
index 000000000..845011be6
--- /dev/null
+++ b/tests/tools/tflite_run/src/bin_image.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_RUN_LIBJPEG_H__
+#define __TFLITE_RUN_LIBJPEG_H__
+
+#include <string>
+#include <vector>
+
+#include "tensorflow/contrib/lite/context.h"
+
+class BinImage
+{
+public:
+ BinImage(unsigned int width, unsigned int height, unsigned int channel);
+ ~BinImage();
+
+ void loadImage(const std::string &filename);
+
+ void AssignTensor(TfLiteTensor *t);
+
+private:
+ unsigned int _width;
+ unsigned int _height;
+ unsigned int _channels;
+
+ std::vector<unsigned char> _image;
+};
+
+#endif // __TFLITE_RUN_LIBJPEG_H__
diff --git a/tests/tools/tflite_run/src/tensor_dumper.cc b/tests/tools/tflite_run/src/tensor_dumper.cc
new file mode 100644
index 000000000..8568c9b67
--- /dev/null
+++ b/tests/tools/tflite_run/src/tensor_dumper.cc
@@ -0,0 +1,54 @@
+#include "tensor_dumper.h"
+
+#include <fstream>
+#include <iostream>
+#include <cstring>
+
+#include "tensorflow/contrib/lite/interpreter.h"
+
+namespace TFLiteRun
+{
+
+TensorDumper::TensorDumper()
+{
+ // DO NOTHING
+}
+
+void TensorDumper::addTensors(tflite::Interpreter &interpreter, const std::vector<int> &indices)
+{
+ for (const auto &o : indices)
+ {
+ const TfLiteTensor *tensor = interpreter.tensor(o);
+ int size = tensor->bytes;
+ std::vector<char> buffer;
+ buffer.resize(size);
+ memcpy(buffer.data(), tensor->data.raw, size);
+ _tensors.emplace_back(o, std::move(buffer));
+ }
+}
+
+void TensorDumper::dump(const std::string &filename) const
+{
+ // TODO Handle file open/write error
+ std::ofstream file(filename, std::ios::out | std::ios::binary);
+
+ // Write number of tensors
+ uint32_t num_tensors = static_cast<uint32_t>(_tensors.size());
+ file.write(reinterpret_cast<const char *>(&num_tensors), sizeof(num_tensors));
+
+ // Write tensor indices
+ for (const auto &t : _tensors)
+ {
+ file.write(reinterpret_cast<const char *>(&t._index), sizeof(int));
+ }
+
+ // Write data
+ for (const auto &t : _tensors)
+ {
+ file.write(t._data.data(), t._data.size());
+ }
+
+ file.close();
+}
+
+} // end of namespace TFLiteRun
diff --git a/tests/tools/tflite_run/src/tensor_dumper.h b/tests/tools/tflite_run/src/tensor_dumper.h
new file mode 100644
index 000000000..2805f1076
--- /dev/null
+++ b/tests/tools/tflite_run/src/tensor_dumper.h
@@ -0,0 +1,38 @@
+#ifndef __TFLITE_RUN_TENSOR_DUMPER_H__
+#define __TFLITE_RUN_TENSOR_DUMPER_H__
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace tflite
+{
+class Interpreter;
+}
+
+namespace TFLiteRun
+{
+
+class TensorDumper
+{
+private:
+ struct Tensor
+ {
+ int _index;
+ std::vector<char> _data;
+
+ Tensor(int index, std::vector<char> &&data) : _index(index), _data(std::move(data)) {}
+ };
+
+public:
+ TensorDumper();
+ void addTensors(tflite::Interpreter &interpreter, const std::vector<int> &indices);
+ void dump(const std::string &filename) const;
+
+private:
+ std::vector<Tensor> _tensors;
+};
+
+} // end of namespace TFLiteRun
+
+#endif // __TFLITE_RUN_TENSOR_DUMPER_H__
diff --git a/tests/tools/tflite_run/src/tensor_loader.cc b/tests/tools/tflite_run/src/tensor_loader.cc
new file mode 100644
index 000000000..934b78f40
--- /dev/null
+++ b/tests/tools/tflite_run/src/tensor_loader.cc
@@ -0,0 +1,67 @@
+#include "tensor_loader.h"
+
+#include <assert.h>
+
+#include <fstream>
+
+#include "misc/tensor/Shape.h"
+
+namespace TFLiteRun
+{
+
+TensorLoader::TensorLoader(tflite::Interpreter &interpreter)
+ : _interpreter(interpreter), _raw_data(nullptr)
+{
+}
+
+void TensorLoader::load(const std::string &filename)
+{
+ // TODO Handle file open/read error
+ std::ifstream file(filename, std::ios::ate | std::ios::binary);
+ size_t file_size = file.tellg();
+ file.seekg(0, std::ios::beg);
+
+ uint32_t num_tensors = 0;
+ file.read(reinterpret_cast<char *>(&num_tensors), sizeof(num_tensors));
+
+ int tensor_indices_raw[num_tensors];
+ file.read(reinterpret_cast<char *>(tensor_indices_raw), sizeof(tensor_indices_raw));
+ std::vector<int> tensor_indices(tensor_indices_raw, tensor_indices_raw + num_tensors);
+
+ _raw_data = std::unique_ptr<float>(new float[file_size]);
+ file.read(reinterpret_cast<char *>(_raw_data.get()), file_size);
+
+ size_t offset = 0;
+ for (const auto &o : tensor_indices)
+ {
+ const TfLiteTensor *tensor = _interpreter.tensor(o);
+
+ // Convert tensor shape to `Shape` from `tensor->dims`
+ nnfw::misc::tensor::Shape shape(static_cast<size_t>(tensor->dims->size));
+ for (int d = 0; d < tensor->dims->size; d++)
+ {
+ shape.dim(d) = tensor->dims->data[d];
+ }
+
+ float *base = _raw_data.get() + offset;
+
+ assert(tensor->bytes % sizeof(float) == 0);
+ offset += (tensor->bytes / sizeof(float));
+
+ _tensor_map.insert(std::make_pair(o, nnfw::tflite::TensorView<float>(shape, base)));
+ }
+
+ // The file size and total output tensor size must match
+ assert(file_size == sizeof(num_tensors) + sizeof(tensor_indices_raw) + offset * sizeof(float));
+
+ file.close();
+}
+
+const nnfw::tflite::TensorView<float> &TensorLoader::get(int tensor_idx) const
+{
+ auto found = _tensor_map.find(tensor_idx);
+ assert(found != _tensor_map.end());
+ return found->second;
+}
+
+} // end of namespace TFLiteRun
diff --git a/tests/tools/tflite_run/src/tensor_loader.h b/tests/tools/tflite_run/src/tensor_loader.h
new file mode 100644
index 000000000..fc4a37a08
--- /dev/null
+++ b/tests/tools/tflite_run/src/tensor_loader.h
@@ -0,0 +1,35 @@
+#ifndef __TFLITE_RUN_TENSOR_LOADER_H__
+#define __TFLITE_RUN_TENSOR_LOADER_H__
+
+#include <sys/mman.h>
+
+#include <string>
+#include <unordered_map>
+
+#include "tflite/TensorView.h"
+
+namespace tflite
+{
+class Interpreter;
+}
+
+namespace TFLiteRun
+{
+
+class TensorLoader
+{
+public:
+ TensorLoader(tflite::Interpreter &interpreter);
+ void load(const std::string &filename);
+ const nnfw::tflite::TensorView<float> &get(int tensor_idx) const;
+ size_t getNums() const { return _tensor_map.size(); }
+
+private:
+ tflite::Interpreter &_interpreter;
+ std::unique_ptr<float> _raw_data;
+ std::unordered_map<int, nnfw::tflite::TensorView<float>> _tensor_map;
+};
+
+} // end of namespace TFLiteRun
+
+#endif // __TFLITE_RUN_TENSOR_LOADER_H__
diff --git a/tests/tools/tflite_run/src/tflite_run.cc b/tests/tools/tflite_run/src/tflite_run.cc
new file mode 100644
index 000000000..5be6909e5
--- /dev/null
+++ b/tests/tools/tflite_run/src/tflite_run.cc
@@ -0,0 +1,262 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tflite/ext/kernels/register.h"
+#include "tensorflow/contrib/lite/model.h"
+
+#include "bin_image.h"
+#include "args.h"
+#include "tensor_dumper.h"
+#include "tensor_loader.h"
+#include "misc/benchmark.h"
+#include "misc/environment.h"
+#include "misc/fp32.h"
+#include "tflite/Diff.h"
+#include "tflite/Assert.h"
+#include "tflite/Session.h"
+#include "tflite/InterpreterSession.h"
+#include "tflite/NNAPISession.h"
+#include "misc/tensor/IndexIterator.h"
+#include "misc/tensor/Object.h"
+
+#include <iostream>
+#include <chrono>
+#include <algorithm>
+
+using namespace tflite;
+using namespace nnfw::tflite;
+using namespace std::placeholders; // for _1, _2 ...
+
+void print_max_idx(float *f, int size)
+{
+ float *p = std::max_element(f, f + size);
+ std::cout << "max:" << p - f;
+}
+
+int main(const int argc, char **argv)
+{
+ bool use_nnapi = false;
+
+ if (std::getenv("USE_NNAPI") != nullptr)
+ {
+ use_nnapi = true;
+ }
+
+ StderrReporter error_reporter;
+
+ TFLiteRun::Args args(argc, argv);
+
+ auto model = FlatBufferModel::BuildFromFile(args.getTFLiteFilename().c_str(), &error_reporter);
+ std::unique_ptr<Interpreter> interpreter;
+
+ std::chrono::milliseconds t_prepare(0);
+ std::chrono::milliseconds t_invoke(0);
+
+ nnfw::misc::benchmark::measure(t_prepare) << [&](void) {
+ BuiltinOpResolver resolver;
+
+ InterpreterBuilder builder(*model, resolver);
+
+ TFLITE_ENSURE(builder(&interpreter))
+
+ interpreter->SetNumThreads(1);
+ };
+
+ std::shared_ptr<nnfw::tflite::Session> sess;
+
+ if (use_nnapi)
+ {
+ sess = std::make_shared<nnfw::tflite::NNAPISession>(interpreter.get());
+ }
+ else
+ {
+ sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get());
+ }
+
+ sess->prepare();
+
+ TFLiteRun::TensorLoader tensor_loader(*interpreter);
+
+ // Load input from dumped tensor file.
+ if (!args.getCompareFilename().empty())
+ {
+ tensor_loader.load(args.getCompareFilename());
+
+ for (const auto &o : interpreter->inputs())
+ {
+ const auto &tensor_view = tensor_loader.get(o);
+ TfLiteTensor *tensor = interpreter->tensor(o);
+
+ memcpy(reinterpret_cast<void *>(tensor->data.f),
+ reinterpret_cast<const void *>(tensor_view._base), tensor->bytes);
+ }
+ }
+ else
+ {
+ const int seed = 1; /* TODO Add an option for seed value */
+ RandomGenerator randgen{seed, 0.0f, 2.0f};
+
+ // No input specified. So we fill the input tensors with random values.
+ for (const auto &o : interpreter->inputs())
+ {
+ TfLiteTensor *tensor = interpreter->tensor(o);
+ if (tensor->type == kTfLiteInt32)
+ {
+ // Generate singed 32-bit integer (s32) input
+ auto tensor_view = nnfw::tflite::TensorView<int32_t>::make(*interpreter, o);
+
+ int32_t value = 0;
+
+ nnfw::misc::tensor::iterate(tensor_view.shape())
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ // TODO Generate random values
+ // Gather operation: index should be within input coverage.
+ tensor_view.at(ind) = value;
+ value++;
+ };
+ }
+ else if (tensor->type == kTfLiteUInt8)
+ {
+ // Generate unsigned 8-bit integer input
+ auto tensor_view = nnfw::tflite::TensorView<uint8_t>::make(*interpreter, o);
+
+ uint8_t value = 0;
+
+ nnfw::misc::tensor::iterate(tensor_view.shape())
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ // TODO Generate random values
+ tensor_view.at(ind) = value;
+ value = (value + 1) & 0xFF;
+ };
+ }
+ else if (tensor->type == kTfLiteBool)
+ {
+ // Generate bool input
+ auto tensor_view = nnfw::tflite::TensorView<bool>::make(*interpreter, o);
+
+ auto fp = static_cast<bool (RandomGenerator::*)(const ::nnfw::misc::tensor::Shape &,
+ const ::nnfw::misc::tensor::Index &)>(
+ &RandomGenerator::generate<bool>);
+ const nnfw::misc::tensor::Object<bool> data(tensor_view.shape(),
+ std::bind(fp, randgen, _1, _2));
+
+ nnfw::misc::tensor::iterate(tensor_view.shape())
+ << [&](const nnfw::misc::tensor::Index &ind) {
+ const auto value = data.at(ind);
+ tensor_view.at(ind) = value;
+ };
+ }
+ else
+ {
+ assert(tensor->type == kTfLiteFloat32);
+
+ const float *end = reinterpret_cast<const float *>(tensor->data.raw_const + tensor->bytes);
+ for (float *ptr = tensor->data.f; ptr < end; ptr++)
+ {
+ *ptr = randgen.generate<float>();
+ }
+ }
+ }
+ }
+
+ TFLiteRun::TensorDumper tensor_dumper;
+ // Must be called before `interpreter->Invoke()`
+ tensor_dumper.addTensors(*interpreter, interpreter->inputs());
+
+ std::cout << "input tensor indices = [";
+ for (const auto &o : interpreter->inputs())
+ {
+ std::cout << o << ",";
+ }
+ std::cout << "]" << std::endl;
+
+ nnfw::misc::benchmark::measure(t_invoke) << [&sess](void) {
+ if (!sess->run())
+ {
+ assert(0 && "run failed!");
+ }
+ };
+
+ sess->teardown();
+
+ // Must be called after `interpreter->Invoke()`
+ tensor_dumper.addTensors(*interpreter, interpreter->outputs());
+
+ std::cout << "output tensor indices = [";
+ for (const auto &o : interpreter->outputs())
+ {
+ std::cout << o << "(";
+
+ print_max_idx(interpreter->tensor(o)->data.f, interpreter->tensor(o)->bytes / sizeof(float));
+
+ std::cout << "),";
+ }
+ std::cout << "]" << std::endl;
+
+ std::cout << "Prepare takes " << t_prepare.count() / 1000.0 << " seconds" << std::endl;
+ std::cout << "Invoke takes " << t_invoke.count() / 1000.0 << " seconds" << std::endl;
+
+ if (!args.getDumpFilename().empty())
+ {
+ const std::string &dump_filename = args.getDumpFilename();
+ tensor_dumper.dump(dump_filename);
+ std::cout << "Input/output tensors have been dumped to file \"" << dump_filename << "\"."
+ << std::endl;
+ }
+
+ if (!args.getCompareFilename().empty())
+ {
+ const std::string &compare_filename = args.getCompareFilename();
+ std::cout << "========================================" << std::endl;
+ std::cout << "Comparing the results with \"" << compare_filename << "\"." << std::endl;
+ std::cout << "========================================" << std::endl;
+
+ // TODO Code duplication (copied from RandomTestRunner)
+
+ int tolerance = 1;
+ nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance);
+
+ auto equals = [tolerance](float lhs, float rhs) {
+ // NOTE Hybrid approach
+ // TODO Allow users to set tolerance for absolute_epsilon_equal
+ if (nnfw::misc::fp32::absolute_epsilon_equal(lhs, rhs))
+ {
+ return true;
+ }
+
+ return nnfw::misc::fp32::epsilon_equal(lhs, rhs, tolerance);
+ };
+
+ nnfw::misc::tensor::Comparator comparator(equals);
+ TfLiteInterpMatchApp app(comparator);
+ bool res = true;
+
+ for (const auto &o : interpreter->outputs())
+ {
+ auto expected = tensor_loader.get(o);
+ auto obtained = nnfw::tflite::TensorView<float>::make(*interpreter, o);
+
+ res = res && app.compareSingleTensorView(expected, obtained, o);
+ }
+
+ if (!res)
+ {
+ return 255;
+ }
+ }
+
+ return 0;
+}
diff --git a/tests/tools/tflite_run/src/tflite_test.cc b/tests/tools/tflite_run/src/tflite_test.cc
new file mode 100644
index 000000000..d0d36c229
--- /dev/null
+++ b/tests/tools/tflite_run/src/tflite_test.cc
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+
+TEST(TFLite_test_case, simple_test) { EXPECT_EQ(1, 1); }