diff options
Diffstat (limited to 'tests/tools')
23 files changed, 2278 insertions, 0 deletions
diff --git a/tests/tools/CMakeLists.txt b/tests/tools/CMakeLists.txt new file mode 100644 index 000000000..b1eea12f9 --- /dev/null +++ b/tests/tools/CMakeLists.txt @@ -0,0 +1,8 @@ +set(EXCLUDE_DIR "") + +if(OBS_BUILD) + list(APPEND EXCLUDE_DIR tflite_benchmark_model) + list(APPEND EXCLUDE_DIR tflite_run) +endif(OBS_BUILD) + +add_subdirectories(EXCLUDES ${EXCLUDE_DIR}) diff --git a/tests/tools/nnapi_test/CMakeLists.txt b/tests/tools/nnapi_test/CMakeLists.txt new file mode 100644 index 000000000..b52f4f34b --- /dev/null +++ b/tests/tools/nnapi_test/CMakeLists.txt @@ -0,0 +1,5 @@ +list(APPEND SOURCES "src/nnapi_test.cc") + +add_executable(nnapi_test ${SOURCES}) +target_link_libraries(nnapi_test nnfw_lib_tflite) +install(TARGETS nnapi_test DESTINATION bin) diff --git a/tests/tools/nnapi_test/src/nnapi_test.cc b/tests/tools/nnapi_test/src/nnapi_test.cc new file mode 100644 index 000000000..73e80f01f --- /dev/null +++ b/tests/tools/nnapi_test/src/nnapi_test.cc @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tflite/ext/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" + +#include "tflite/interp/FlatBufferBuilder.h" +#include "tflite/Diff.h" + +#include <iostream> +#include <stdexcept> + +using namespace tflite; +using namespace nnfw::tflite; + +int main(const int argc, char **argv) +{ + if (argc < 2) + { + std::cerr << "nnapi_test\n\n"; + std::cerr << "Usage: " << argv[0] << " <.tflite>\n\n"; + return 1; + } + + const auto filename = argv[1]; + + StderrReporter error_reporter; + + auto model = FlatBufferModel::BuildFromFile(filename, &error_reporter); + + const nnfw::tflite::FlatBufferBuilder builder(*model); + + try + { + return RandomTestRunner::make(0).run(builder); + } + catch (const std::exception &e) + { + std::cerr << e.what() << std::endl; + return 1; + } +} diff --git a/tests/tools/tflite_benchmark/CMakeLists.txt b/tests/tools/tflite_benchmark/CMakeLists.txt new file mode 100644 index 000000000..56421a294 --- /dev/null +++ b/tests/tools/tflite_benchmark/CMakeLists.txt @@ -0,0 +1,5 @@ +list(APPEND SOURCES "src/tflite_benchmark.cc") + +add_executable(tflite_benchmark ${SOURCES}) +target_link_libraries(tflite_benchmark nnfw_lib_tflite tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_misc) +install(TARGETS tflite_benchmark DESTINATION bin) diff --git a/tests/tools/tflite_benchmark/src/tflite_benchmark.cc b/tests/tools/tflite_benchmark/src/tflite_benchmark.cc new file mode 100644 index 000000000..b77afc189 --- /dev/null +++ b/tests/tools/tflite_benchmark/src/tflite_benchmark.cc @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tflite/ext/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" + +#include "tflite/Assert.h" +#include "tflite/Session.h" +#include "tflite/InterpreterSession.h" +#include "tflite/NNAPISession.h" +#include "tflite/Diff.h" +#include "misc/tensor/IndexIterator.h" + +#include <boost/accumulators/accumulators.hpp> +#include <boost/accumulators/statistics/stats.hpp> +#include <boost/accumulators/statistics/min.hpp> +#include <boost/accumulators/statistics/max.hpp> +#include <boost/accumulators/statistics/mean.hpp> + +#include <iostream> + +#include "misc/environment.h" +#include "misc/benchmark.h" + +using namespace tflite; +using namespace nnfw::tflite; + +void help(std::ostream &out, const int argc, char **argv) +{ + std::string cmd = argv[0]; + auto pos = cmd.find_last_of("/"); + if (pos != std::string::npos) + cmd = cmd.substr(pos + 1); + + out << "use:" << std::endl << cmd << " <model file name>" << std::endl; +} + +bool checkParams(const int argc, char **argv) +{ + if (argc < 2) + { + help(std::cerr, argc, argv); + return false; + } + return true; +} + +int main(const int argc, char **argv) +{ + + if (!checkParams(argc, argv)) + { + return -1; + } + + const auto filename = argv[1]; + + const bool use_nnapi = nnfw::misc::get_env_bool("USE_NNAPI"); + const auto thread_count = nnfw::misc::get_env_int("THREAD", -1); + + std::cout << "Num threads: " << thread_count << std::endl; + if (use_nnapi) + { + std::cout << "Use NNAPI" << std::endl; + } + + StderrReporter error_reporter; + + auto model = FlatBufferModel::BuildFromFile(filename, &error_reporter); + if (model == nullptr) + { + std::cerr << "Cannot create model" << std::endl; + return -1; + } + + BuiltinOpResolver resolver; + + InterpreterBuilder builder(*model, resolver); + + std::unique_ptr<Interpreter> interpreter; + + try + { + TFLITE_ENSURE(builder(&interpreter)); + } + catch (const std::exception &e) + { + std::cerr << e.what() << std::endl; + return 1; + } + + // Show inputs + for (uint32_t n = 0; n < interpreter->inputs().size(); ++n) + { + // TODO Print shape + auto tensor_id = interpreter->inputs().at(n); + auto tensor_ptr = interpreter->tensor(tensor_id); + + std::cout << "Input #" << n << ":" << std::endl; + std::cout << " Name: " << tensor_ptr->name << std::endl; + } + + // Show outputs + for (uint32_t n = 0; n < interpreter->outputs().size(); ++n) + { + // TODO Print shape + auto tensor_id = interpreter->outputs().at(n); + auto tensor_ptr = interpreter->tensor(tensor_id); + + std::cout << "Output #" << n << ":" << std::endl; + std::cout << " Name: " << tensor_ptr->name << std::endl; + } + + interpreter->SetNumThreads(thread_count); + + std::shared_ptr<nnfw::tflite::Session> sess; + + if (use_nnapi) + { + sess = std::make_shared<nnfw::tflite::NNAPISession>(interpreter.get()); + } + else + { + sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get()); + } + + // + // Warming-up + // + for (uint32_t n = 0; n < 3; ++n) + { + std::chrono::milliseconds elapsed(0); + + sess->prepare(); + + for (const auto &id : interpreter->inputs()) + { + TfLiteTensor *tensor = interpreter->tensor(id); + if (tensor->type == kTfLiteInt32) + { + // Generate singed 32-bit integer (s32) input + auto tensor_view = nnfw::tflite::TensorView<int32_t>::make(*interpreter, id); + + int32_t value = 0; + + nnfw::misc::tensor::iterate(tensor_view.shape()) + << [&](const nnfw::misc::tensor::Index &ind) { + // TODO Generate random values + // Gather operation: index should be within input coverage. + tensor_view.at(ind) = value; + value++; + }; + } + else if (tensor->type == kTfLiteUInt8) + { + // Generate unsigned 8-bit integer input + auto tensor_view = nnfw::tflite::TensorView<uint8_t>::make(*interpreter, id); + + uint8_t value = 0; + + nnfw::misc::tensor::iterate(tensor_view.shape()) + << [&](const nnfw::misc::tensor::Index &ind) { + // TODO Generate random values + tensor_view.at(ind) = value; + value = (value + 1) & 0xFF; + }; + } + else + { + assert(tensor->type == kTfLiteFloat32); + + const int seed = 1; /* TODO Add an option for seed value */ + RandomGenerator randgen{seed, 0.0f, 0.2f}; + const float *end = reinterpret_cast<const float *>(tensor->data.raw_const + tensor->bytes); + for (float *ptr = tensor->data.f; ptr < end; ptr++) + { + *ptr = randgen.generate<float>(); + } + } + } + + nnfw::misc::benchmark::measure(elapsed) << [&](void) { + if (!sess->run()) + { + assert(0 && "run failed"); + } + }; + sess->teardown(); + + std::cout << "Warming-up " << n << ": " << elapsed.count() << "ms" << std::endl; + } + + // + // Measure + // + const auto cnt = nnfw::misc::get_env_int("COUNT", 1); + + using namespace boost::accumulators; + + accumulator_set<double, stats<tag::mean, tag::min, tag::max>> acc; + + for (int n = 0; n < cnt; ++n) + { + std::chrono::milliseconds elapsed(0); + + sess->prepare(); + nnfw::misc::benchmark::measure(elapsed) << [&](void) { + if (!sess->run()) + { + assert(0 && "run failed"); + } + }; + sess->teardown(); + + acc(elapsed.count()); + + std::cout << "Iteration " << n << ": " << elapsed.count() << "ms" << std::endl; + } + + std::cout << "--------" << std::endl; + std::cout << "Min: " << min(acc) << "ms" << std::endl; + std::cout << "Max: " << max(acc) << "ms" << std::endl; + std::cout << "Mean: " << mean(acc) << "ms" << std::endl; + + return 0; +} diff --git a/tests/tools/tflite_benchmark_model/.FORMATDENY b/tests/tools/tflite_benchmark_model/.FORMATDENY new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/tests/tools/tflite_benchmark_model/.FORMATDENY diff --git a/tests/tools/tflite_benchmark_model/CMakeLists.txt b/tests/tools/tflite_benchmark_model/CMakeLists.txt new file mode 100644 index 000000000..c48f658c1 --- /dev/null +++ b/tests/tools/tflite_benchmark_model/CMakeLists.txt @@ -0,0 +1,18 @@ +if (NOT BUILD_TFLITE_BENCHMARK_MODEL) + return() +endif(NOT BUILD_TFLITE_BENCHMARK_MODEL) + +file(GLOB_RECURSE SOURCES "*.cc") + +nnfw_find_package(TensorFlowSource REQUIRED) +set(TENSORFLOW_LITE_BASE "${TensorFlowSource_DIR}/tensorflow/contrib/lite") +list(APPEND SOURCES "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_main.cc" + "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_model.cc" + "${TENSORFLOW_LITE_BASE}/tools/benchmark/benchmark_params.cc" + "${TENSORFLOW_LITE_BASE}/tools/benchmark/command_line_flags.cc") + +add_executable(tflite_benchmark_model ${SOURCES}) +target_compile_definitions(tflite_benchmark_model PUBLIC "TFLITE_PROFILING_ENABLED") +target_link_libraries(tflite_benchmark_model nnfw_lib_misc nnfw_lib_tflite nnfw_lib_profiling) +target_link_libraries(tflite_benchmark_model tensorflow-lite ${LIB_PTHREAD} dl) +install(TARGETS tflite_benchmark_model DESTINATION bin) diff --git a/tests/tools/tflite_benchmark_model/README.md b/tests/tools/tflite_benchmark_model/README.md new file mode 100644 index 000000000..8d997639f --- /dev/null +++ b/tests/tools/tflite_benchmark_model/README.md @@ -0,0 +1,197 @@ +# TFLite Model Benchmark Tool + +## Description + +A simple C++ binary to benchmark a TFLite model and its individual operators, +both on desktop machines and on Android. The binary takes a TFLite model, +generates random inputs and then repeatedly runs the model for specified number +of runs. Aggregrate latency statistics are reported after running the benchmark. + +The instructions below are for running the binary on Desktop and Android, +for iOS please use the +[iOS benchmark app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark/ios). + +## Parameters + +The binary takes the following required parameters: + +* `graph`: `string` \ + The path to the TFLite model file. + +and the following optional parameters: + +* `num_threads`: `int` (default=1) \ + The number of threads to use for running TFLite interpreter. +* `warmup_runs`: `int` (default=1) \ + The number of warmup runs to do before starting the benchmark. +* `num_runs`: `int` (default=50) \ + The number of runs. Increase this to reduce variance. +* `run_delay`: `float` (default=-1.0) \ + The delay in seconds between subsequent benchmark runs. Non-positive values + mean use no delay. +* `use_nnapi`: `bool` (default=false) \ + Whether to use [Android NNAPI](https://developer.android.com/ndk/guides/neuralnetworks/). + This API is available on recent Android devices. + +## To build/install/run + +### On Android: + +(0) Refer to https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android to edit the `WORKSPACE` to configure the android NDK/SDK. + +(1) Build for your specific platform, e.g.: + +``` +bazel build -c opt \ + --config=android_arm \ + --cxxopt='--std=c++11' \ + tensorflow/contrib/lite/tools/benchmark:benchmark_model +``` + +(2) Connect your phone. Push the binary to your phone with adb push + (make the directory if required): + +``` +adb push bazel-bin/tensorflow/contrib/lite/tools/benchmark/benchmark_model /data/local/tmp +``` + +(3) Make the binary executable. + +``` +adb shell chmod +x /data/local/tmp/benchmark_model +``` + +(4) Push the compute graph that you need to test. For example: + +``` +adb push mobilenet_quant_v1_224.tflite /data/local/tmp +``` + +(5) Run the benchmark. For example: + +``` +adb shell /data/local/tmp/benchmark_model \ + --graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \ + --num_threads=4 +``` + +### On desktop: +(1) build the binary + +``` +bazel build -c opt tensorflow/contrib/lite/tools/benchmark:benchmark_model +``` + +(2) Run on your compute graph, similar to the Android case but without the need of adb shell. +For example: + +``` +bazel-bin/tensorflow/contrib/lite/tools/benchmark/benchmark_model \ + --graph=mobilenet_quant_v1_224.tflite \ + --num_threads=4 +``` + +The MobileNet graph used as an example here may be downloaded from [here](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip). + + +## Reducing variance between runs on Android. + +Most modern Android phones use [ARM big.LITTLE](https://en.wikipedia.org/wiki/ARM_big.LITTLE) +architecture where some cores are more power hungry but faster than other cores. +When running benchmarks on these phones there can be significant variance +between different runs of the benchmark. One way to reduce variance between runs +is to set the [CPU affinity](https://en.wikipedia.org/wiki/Processor_affinity) +before running the benchmark. On Android this can be done using the `taskset` +command. +E.g. for running the benchmark on big cores on Pixel 2 with a single thread one +can use the following command: + +``` +adb shell taskset f0 /data/local/tmp/benchmark_model \ + --graph=/data/local/tmp/mobilenet_quant_v1_224.tflite \ + --num_threads=1 +``` + +where `f0` is the affinity mask for big cores on Pixel 2. +Note: The affinity mask varies with the device. + +## Profiling model operators +The benchmark model binary also allows you to profile operators and give execution times of each operator. To do this, +compile the binary with a compiler flag that enables profiling to be compiled in. Pass **--copt=-DTFLITE_PROFILING_ENABLED** +to compile benchmark with profiling support. +For example, to compile with profiling support on Android, add this flag to the previous command: + +``` +bazel build -c opt \ + --config=android_arm \ + --cxxopt='--std=c++11' \ + --copt=-DTFLITE_PROFILING_ENABLED \ + tensorflow/contrib/lite/tools/benchmark:benchmark_model +``` +This compiles TFLite with profiling enabled, now you can run the benchmark binary like before. The binary will produce detailed statistics for each operation similar to those shown below: + +``` + +============================== Run Order ============================== + [node type] [start] [first] [avg ms] [%] [cdf%] [mem KB] [times called] [Name] + CONV_2D 0.000 4.269 4.269 0.107% 0.107% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_0/Relu6] + DEPTHWISE_CONV_2D 4.270 2.150 2.150 0.054% 0.161% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_1_depthwise/Relu6] + CONV_2D 6.421 6.107 6.107 0.153% 0.314% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6] + DEPTHWISE_CONV_2D 12.528 1.366 1.366 0.034% 0.348% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_2_depthwise/Relu6] + CONV_2D 13.895 4.195 4.195 0.105% 0.454% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_2_pointwise/Relu6] + DEPTHWISE_CONV_2D 18.091 1.260 1.260 0.032% 0.485% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_3_depthwise/Relu6] + CONV_2D 19.352 6.652 6.652 0.167% 0.652% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6] + DEPTHWISE_CONV_2D 26.005 0.698 0.698 0.018% 0.670% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_4_depthwise/Relu6] + CONV_2D 26.703 3.344 3.344 0.084% 0.754% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_4_pointwise/Relu6] + DEPTHWISE_CONV_2D 30.047 0.646 0.646 0.016% 0.770% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_5_depthwise/Relu6] + CONV_2D 30.694 5.800 5.800 0.145% 0.915% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6] + DEPTHWISE_CONV_2D 36.495 0.331 0.331 0.008% 0.924% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_6_depthwise/Relu6] + CONV_2D 36.826 2.838 2.838 0.071% 0.995% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_6_pointwise/Relu6] + DEPTHWISE_CONV_2D 39.665 0.439 0.439 0.011% 1.006% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_depthwise/Relu6] + CONV_2D 40.105 5.293 5.293 0.133% 1.139% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6] + DEPTHWISE_CONV_2D 45.399 0.352 0.352 0.009% 1.147% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_depthwise/Relu6] + CONV_2D 45.752 5.322 5.322 0.133% 1.281% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6] + DEPTHWISE_CONV_2D 51.075 0.357 0.357 0.009% 1.290% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_depthwise/Relu6] + CONV_2D 51.432 5.693 5.693 0.143% 1.433% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6] + DEPTHWISE_CONV_2D 57.126 0.366 0.366 0.009% 1.442% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_10_depthwise/Relu6] + CONV_2D 57.493 5.472 5.472 0.137% 1.579% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6] + DEPTHWISE_CONV_2D 62.966 0.364 0.364 0.009% 1.588% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_11_depthwise/Relu6] + CONV_2D 63.330 5.404 5.404 0.136% 1.724% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6] + DEPTHWISE_CONV_2D 68.735 0.155 0.155 0.004% 1.728% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_12_depthwise/Relu6] + CONV_2D 68.891 2.970 2.970 0.074% 1.802% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_12_pointwise/Relu6] + DEPTHWISE_CONV_2D 71.862 0.206 0.206 0.005% 1.807% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_13_depthwise/Relu6] + CONV_2D 72.069 5.888 5.888 0.148% 1.955% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6] + AVERAGE_POOL_2D 77.958 0.036 0.036 0.001% 1.956% 0.000 0 [MobilenetV1/Logits/AvgPool_1a/AvgPool] + CONV_2D 77.994 1.445 1.445 0.036% 1.992% 0.000 0 [MobilenetV1/Logits/Conv2d_1c_1x1/BiasAdd] + RESHAPE 79.440 0.002 0.002 0.000% 1.992% 0.000 0 [MobilenetV1/Predictions/Reshape] + SOFTMAX 79.443 0.029 0.029 0.001% 1.993% 0.000 0 [MobilenetV1/Predictions/Softmax] + +============================== Top by Computation Time ============================== + [node type] [start] [first] [avg ms] [%] [cdf%] [mem KB] [times called] [Name] + CONV_2D 19.352 6.652 6.652 0.167% 0.167% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_3_pointwise/Relu6] + CONV_2D 6.421 6.107 6.107 0.153% 0.320% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_1_pointwise/Relu6] + CONV_2D 72.069 5.888 5.888 0.148% 0.468% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_13_pointwise/Relu6] + CONV_2D 30.694 5.800 5.800 0.145% 0.613% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_5_pointwise/Relu6] + CONV_2D 51.432 5.693 5.693 0.143% 0.756% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_9_pointwise/Relu6] + CONV_2D 57.493 5.472 5.472 0.137% 0.893% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_10_pointwise/Relu6] + CONV_2D 63.330 5.404 5.404 0.136% 1.029% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_11_pointwise/Relu6] + CONV_2D 45.752 5.322 5.322 0.133% 1.162% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_8_pointwise/Relu6] + CONV_2D 40.105 5.293 5.293 0.133% 1.295% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_7_pointwise/Relu6] + CONV_2D 0.000 4.269 4.269 0.107% 1.402% 0.000 0 [MobilenetV1/MobilenetV1/Conv2d_0/Relu6] + +Number of nodes executed: 31 +============================== Summary by node type ============================== + [Node type] [count] [avg ms] [avg %] [cdf %] [mem KB] [times called] + CONV_2D 15 1.406 89.270% 89.270% 0.000 0 + DEPTHWISE_CONV_2D 13 0.169 10.730% 100.000% 0.000 0 + SOFTMAX 1 0.000 0.000% 100.000% 0.000 0 + RESHAPE 1 0.000 0.000% 100.000% 0.000 0 + AVERAGE_POOL_2D 1 0.000 0.000% 100.000% 0.000 0 + +Timings (microseconds): count=50 first=79449 curr=81350 min=77385 max=88213 avg=79732 std=1929 +Memory (bytes): count=0 +31 nodes observed + + +Average inference timings in us: Warmup: 83235, Init: 38467, no stats: 79760.9 +``` diff --git a/tests/tools/tflite_benchmark_model/benchmark_tflite_model.cc b/tests/tools/tflite_benchmark_model/benchmark_tflite_model.cc new file mode 100644 index 000000000..efc8bae52 --- /dev/null +++ b/tests/tools/tflite_benchmark_model/benchmark_tflite_model.cc @@ -0,0 +1,401 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h" + +#include <cstdarg> +#include <cstdlib> +#include <iostream> +#include <memory> +#include <string> +#include <unordered_set> +#include <vector> + +#ifdef TFLITE_FLEX +#include "tensorflow/contrib/lite/delegates/flex/delegate.h" +#endif // TFLITE_FLEX +#include "tflite/ext/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/op_resolver.h" +#include "tensorflow/contrib/lite/string_util.h" +#include "tensorflow/contrib/lite/tools/benchmark/logging.h" + +// For profiling nnapi_delegate +#include "profiling/profiling.h" +#include "tflite/ext/nnapi_delegate.h" + +namespace { + nnfw::tflite::NNAPIDelegate nnfw_delegate_; +} + +#ifdef TFLITE_CUSTOM_OPS_HEADER +void RegisterSelectedOps(::tflite::MutableOpResolver* resolver); +#endif + +namespace tflite { +namespace benchmark { + +void ProfilingListener::SetInterpreter(tflite::Interpreter* interpreter) { + TFLITE_BENCHMARK_CHECK(interpreter); + interpreter_ = interpreter; + interpreter_->SetProfiler(&profiler_); +} + +void ProfilingListener::OnSingleRunStart(RunType run_type) { + if (run_type == REGULAR) { + profiler_.Reset(); + profiler_.StartProfiling(); + } +} + +void ProfilingListener::OnBenchmarkEnd(const BenchmarkResults& results) { + if (has_profiles_) { + TFLITE_LOG(INFO) << summarizer_.GetOutputString(); + } +} + +void ProfilingListener::OnSingleRunEnd() { + profiler_.StopProfiling(); + auto profile_events = profiler_.GetProfileEvents(); + has_profiles_ = !profile_events.empty(); + summarizer_.ProcessProfiles(profile_events, *interpreter_); +} + +namespace { + +std::vector<std::string> Split(const std::string& str, const char delim) { + std::istringstream input(str); + std::vector<std::string> results; + std::string item; + while (std::getline(input, item, delim)) { + results.push_back(item); + } + return results; +} + +template <typename T> +bool SplitAndParse(const std::string& str, char delim, std::vector<T>* values) { + std::istringstream input(str); + bool first = true; + while (!input.eof()) { + if (!first) { + char c; + input >> c; + if (c != delim) { + return false; + } + } else { + first = false; + } + T val; + input >> val; + if (!input.eof() && !input.good()) { + return false; + } + values->push_back(val); + } + return true; +} + +template <typename T> +void FillRandomValue(T* ptr, const std::vector<int>& sizes, + const std::function<T()>& random_func) { + int num_elements = 1; + for (int dim : sizes) { + num_elements *= dim; + } + for (int i = 0; i < num_elements; ++i) { + *ptr++ = random_func(); + } +} + +void FillRandomString(tflite::DynamicBuffer* buffer, + const std::vector<int>& sizes, + const std::function<string()>& random_func) { + int num_elements = 1; + for (int dim : sizes) { + num_elements *= dim; + } + for (int i = 0; i < num_elements; ++i) { + auto str = random_func(); + buffer->AddString(str.data(), str.length()); + } +} + +bool PopulateInputLayerInfo( + const string& names_string, const string& shapes_string, + std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) { + std::vector<std::string> names = Split(names_string, ','); + std::vector<std::string> shapes = Split(shapes_string, ':'); + + if (names.size() != shapes.size()) { + TFLITE_LOG(ERROR) << "The number of items in" + << " --input_layer_shape (" << shapes_string << ", with " + << shapes.size() << " items)" + << " must match the number of items in" + << " --input_layer (" << names_string << ", with " + << names.size() << " items)." + << " For example --input_layer=input1,input2" + << " --input_layer_shape=1,224,224,4:1,20"; + return false; + } + + for (int i = 0; i < names.size(); ++i) { + info->push_back(BenchmarkTfLiteModel::InputLayerInfo()); + BenchmarkTfLiteModel::InputLayerInfo& input = info->back(); + + input.name = names[i]; + + TFLITE_BENCHMARK_CHECK(SplitAndParse(shapes[i], ',', &input.shape)) + << "Incorrect size string specified: " << shapes[i]; + for (int dim : input.shape) { + if (dim == -1) { + TFLITE_LOG(ERROR) + << "Any unknown sizes in the shapes (-1's) must be replaced" + << " with the size you want to benchmark with."; + return false; + } + } + } + + return true; +} + +BenchmarkParams GetDefaultParams() { + BenchmarkParams default_params = BenchmarkModel::DefaultParams(); + default_params.AddParam("graph", BenchmarkParam::Create<std::string>("")); + default_params.AddParam("input_layer", + BenchmarkParam::Create<std::string>("")); + default_params.AddParam("input_layer_shape", + BenchmarkParam::Create<std::string>("")); + default_params.AddParam("use_nnapi", BenchmarkParam::Create<bool>(false)); + return default_params; +} + +} // namespace + +BenchmarkTfLiteModel::BenchmarkTfLiteModel() + : BenchmarkModel(GetDefaultParams()) { + AddListener(&profiling_listener_); +} + +BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params) + : BenchmarkModel(std::move(params)) { + AddListener(&profiling_listener_); +} + +std::vector<Flag> BenchmarkTfLiteModel::GetFlags() { + std::vector<Flag> flags = BenchmarkTfLiteModel::BenchmarkModel::GetFlags(); + std::vector<Flag> specific_flags = { + CreateFlag<std::string>("graph", ¶ms_, "graph file name"), + CreateFlag<std::string>("input_layer", ¶ms_, "input layer names"), + CreateFlag<std::string>("input_layer_shape", ¶ms_, + "input layer shape"), + CreateFlag<bool>("use_nnapi", ¶ms_, "use nnapi api")}; + + flags.insert(flags.end(), specific_flags.begin(), specific_flags.end()); + return flags; +} + +void BenchmarkTfLiteModel::LogParams() { + BenchmarkModel::LogParams(); + TFLITE_LOG(INFO) << "Graph: [" << params_.Get<std::string>("graph") << "]"; + TFLITE_LOG(INFO) << "Input layers: [" + << params_.Get<std::string>("input_layer") << "]"; + TFLITE_LOG(INFO) << "Input shapes: [" + << params_.Get<std::string>("input_layer_shape") << "]"; + TFLITE_LOG(INFO) << "Use nnapi : [" << params_.Get<bool>("use_nnapi") << "]"; +} + +bool BenchmarkTfLiteModel::ValidateParams() { + if (params_.Get<std::string>("graph").empty()) { + TFLITE_LOG(ERROR) + << "Please specify the name of your TF Lite input file with --graph"; + return false; + } + return PopulateInputLayerInfo(params_.Get<std::string>("input_layer"), + params_.Get<std::string>("input_layer_shape"), + &inputs); +} + +uint64_t BenchmarkTfLiteModel::ComputeInputBytes() { + TFLITE_BENCHMARK_CHECK(interpreter); + uint64_t total_input_bytes = 0; + for (int input : interpreter->inputs()) { + auto* t = interpreter->tensor(input); + total_input_bytes += t->bytes; + } + return total_input_bytes; +} + +void BenchmarkTfLiteModel::PrepareInputsAndOutputs() { + auto interpreter_inputs = interpreter->inputs(); + // Set the values of the input tensors. + for (int j = 0; j < inputs.size(); ++j) { + const InputLayerInfo& input = inputs[j]; + int i = interpreter_inputs[j]; + TfLiteTensor* t = interpreter->tensor(i); + std::vector<int> sizes = input.shape; + + // TODO(ahentz): below we ignore the O-th dimension (number of batches). + if (t->type == kTfLiteFloat32) { + FillRandomValue<float>( + interpreter->typed_tensor<float>(i), + std::vector<int>(sizes.begin() + 1, sizes.end()), + []() { return static_cast<float>(rand()) / RAND_MAX - 0.5f; }); + } else if (t->type == kTfLiteInt32) { + // TODO(yunluli): This is currently only used for handling embedding input + // for speech models. Generalize if necessary. + FillRandomValue<int32_t>( + interpreter->typed_tensor<int32_t>(i), + std::vector<int32_t>(sizes.begin() + 1, sizes.end()), + []() { return static_cast<int32_t>(rand()) % 100; }); + } else if (t->type == kTfLiteUInt8) { + FillRandomValue<uint8_t>( + interpreter->typed_tensor<uint8_t>(i), + std::vector<int>(sizes.begin() + 1, sizes.end()), + []() { return static_cast<uint8_t>(rand()) % 255; }); + } else if (t->type == kTfLiteString) { + tflite::DynamicBuffer buffer; + FillRandomString(&buffer, sizes, []() { + return "we're have some friends over saturday to hang out in the yard"; + }); + buffer.WriteToTensor(interpreter->tensor(i)); + } else { + TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t->name + << " of type " << t->type; + } + } +} + +void BenchmarkTfLiteModel::Init() { + std::string graph = params_.Get<std::string>("graph"); + model = tflite::FlatBufferModel::BuildFromFile(graph.c_str()); + if (!model) { + TFLITE_LOG(FATAL) << "Failed to mmap model " << graph; + } + TFLITE_LOG(INFO) << "Loaded model " << graph; + model->error_reporter(); + TFLITE_LOG(INFO) << "resolved reporter"; + +#ifdef TFLITE_CUSTOM_OPS_HEADER + tflite::MutableOpResolver resolver; + RegisterSelectedOps(&resolver); +#else + nnfw::tflite::BuiltinOpResolver resolver; +#endif + + tflite::InterpreterBuilder(*model, resolver)(&interpreter); + if (!interpreter) { + TFLITE_LOG(FATAL) << "Failed to construct interpreter"; + } + profiling_listener_.SetInterpreter(interpreter.get()); + ::profiling::Context::get().setProfiler(interpreter->GetProfiler()); + + auto enable_sync = std::getenv("PROFILING_OP_SYNC"); + if (enable_sync && std::atoi(enable_sync) != 0) + { + ::profiling::Context::get().setSync(); + } + + const int32_t num_threads = params_.Get<int32_t>("num_threads"); + + if (num_threads != -1) { + interpreter->SetNumThreads(num_threads); + } + + bool use_nnapi = params_.Get<bool>("use_nnapi"); + + interpreter->UseNNAPI(use_nnapi); + + if (use_nnapi) { + if (nnfw_delegate_.BuildGraph(interpreter.get()) != kTfLiteOk) { + TFLITE_LOG(FATAL) << "Failed to BuildGraph!"; + } + } + +#ifdef TFLITE_FLEX + TFLITE_LOG(INFO) << "Instantiating Flex Delegate"; + delegate_ = FlexDelegate::Create(); + if (delegate_) { + interpreter->ModifyGraphWithDelegate(delegate_.get(), + /*allow_dynamic_tensors=*/true); + } +#endif // TFLITE_FLEX + + auto interpreter_inputs = interpreter->inputs(); + + if (!inputs.empty()) { + TFLITE_BENCHMARK_CHECK_EQ(inputs.size(), interpreter_inputs.size()) + << "Inputs mismatch: Model inputs #:" << interpreter_inputs.size() + << " expected: " << inputs.size(); + } + + // TFLITE_BENCHMARK_CHECK that all names and types match + for (int j = 0; j < inputs.size(); ++j) { + const InputLayerInfo& input = inputs[j]; + int i = interpreter_inputs[j]; + TfLiteTensor* t = interpreter->tensor(i); + TFLITE_BENCHMARK_CHECK_EQ(t->name, input.name) + << "Tensor # " << i << " is named " << t->name << " but flags call it " + << input.name; + } + + // Resize all non-string tensors. + for (int j = 0; j < inputs.size(); ++j) { + const InputLayerInfo& input = inputs[j]; + int i = interpreter_inputs[j]; + TfLiteTensor* t = interpreter->tensor(i); + if (t->type != kTfLiteString) { + interpreter->ResizeInputTensor(i, input.shape); + } + } + + if (interpreter->AllocateTensors() != kTfLiteOk) { + TFLITE_LOG(FATAL) << "Failed to allocate tensors!"; + } +} + +void BenchmarkTfLiteModel::RunImpl() { + bool use_nnapi = params_.Get<bool>("use_nnapi"); + if (use_nnapi) { + if (nnfw_delegate_.Invoke(interpreter.get()) != kTfLiteOk) { + TFLITE_LOG(FATAL) << "Failed to invoke!"; + } + } else { + if (interpreter->Invoke() != kTfLiteOk) { + TFLITE_LOG(FATAL) << "Failed to invoke!"; + } + } +} + +} // namespace benchmark +} // namespace tflite diff --git a/tests/tools/tflite_benchmark_model/profile_summarizer.cc b/tests/tools/tflite_benchmark_model/profile_summarizer.cc new file mode 100644 index 000000000..ce19b0c98 --- /dev/null +++ b/tests/tools/tflite_benchmark_model/profile_summarizer.cc @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/profiling/profile_summarizer.h" + +#include <sstream> + +#include "tensorflow/contrib/lite/schema/schema_generated.h" + +namespace tflite { +namespace profiling { +namespace { + +struct OperatorDetails { + std::string name; + std::vector<std::string> inputs; + std::vector<std::string> outputs; +}; + +std::string GetTensorName(const tflite::Interpreter& interpreter, + int tensor_index) { + const auto tensor = interpreter.tensor(tensor_index); + if (tensor == nullptr || tensor->name == nullptr) { + return "Unknown"; + } + return tensor->name; +} +std::vector<std::string> GetTensorNames(const tflite::Interpreter& interpreter, + const TfLiteIntArray* tensor_indices) { + std::vector<std::string> tensors; + tensors.reserve(tensor_indices->size); + for (int i = 0; i < tensor_indices->size; i++) { + tensors.push_back(GetTensorName(interpreter, tensor_indices->data[i])); + } + return tensors; +} + +std::string ToString(const std::vector<std::string>& str_vector) { + std::stringstream stream; + stream << "["; + bool first = true; + for (const auto& s : str_vector) { + if (!first) { + stream << ", "; + } else { + first = false; + } + stream << s; + } + stream << "]"; + return stream.str(); +} + +OperatorDetails GetOperatorDetails(const tflite::Interpreter& interpreter, + int node_index) { + auto node_reg = interpreter.node_and_registration(node_index); + auto inputs = node_reg->first.inputs; + auto outputs = node_reg->first.outputs; + int code = node_reg->second.builtin_code; + const char* op_name = nullptr; + if (code == tflite::BuiltinOperator_CUSTOM) { + const char* custom_name = node_reg->second.custom_name; + op_name = custom_name ? custom_name : "UnknownCustomOp"; + } else { + op_name = tflite::EnumNamesBuiltinOperator()[code]; + } + const char* profiling_string = + interpreter.OpProfilingString(node_reg->second, &node_reg->first); + OperatorDetails details; + details.name = op_name; + if (profiling_string) { + details.name += ":" + std::string(profiling_string); + } + details.inputs = GetTensorNames(interpreter, inputs); + details.outputs = GetTensorNames(interpreter, outputs); + return details; +} + +tensorflow::StatSummarizerOptions GetProfileSummarizerOptions() { + auto options = tensorflow::StatSummarizerOptions(); + options.show_summary = true; + options.show_memory = false; + return options; +} + +} // namespace + +ProfileSummarizer::ProfileSummarizer() + : stats_calculator_( + new ::tensorflow::StatsCalculator(GetProfileSummarizerOptions())) {} + +void ProfileSummarizer::ProcessProfiles( + const std::vector<const ProfileEvent*>& profile_stats, + const tflite::Interpreter& interpreter) { + std::vector<const ProfileEvent*> events; + std::copy_if(profile_stats.begin(), profile_stats.end(), + std::back_inserter(events), [](const ProfileEvent* e) { + return e->event_type == + ProfileEvent::EventType::OPERATOR_INVOKE_EVENT && + e->end_timestamp_us >= e->begin_timestamp_us; + }); + // Sort with begin_time. + std::sort(events.begin(), events.end(), + [](const ProfileEvent* const& a, const ProfileEvent* const& b) { + return a->begin_timestamp_us < b->begin_timestamp_us; + }); + if (events.empty()) { + return; + } + + int64_t base_start_us = events[0]->begin_timestamp_us; + int node_num = 0; + int64_t curr_total_us = 0; + int prev_op_idx = -1; + int child_op_no = 1; + for (auto event : events) { + auto op_details = GetOperatorDetails(interpreter, event->event_metadata); + bool from_same_op = (prev_op_idx == event->event_metadata); + child_op_no = from_same_op ? child_op_no + 1 : 1; + auto node_name = ToString(op_details.outputs) + "#" + std::to_string(child_op_no); + int64_t start_us = event->begin_timestamp_us - base_start_us; + int64_t node_exec_time = + event->end_timestamp_us - event->begin_timestamp_us; + stats_calculator_->AddNodeStats(node_name, op_details.name, node_num, + start_us, node_exec_time, 0 /*memory */); + curr_total_us += node_exec_time; + ++node_num; + prev_op_idx = event->event_metadata; + } + stats_calculator_->UpdateRunTotalUs(curr_total_us); +} +} // namespace profiling +} // namespace tflite diff --git a/tests/tools/tflite_benchmark_model/stats_calculator.cc b/tests/tools/tflite_benchmark_model/stats_calculator.cc new file mode 100644 index 000000000..578650701 --- /dev/null +++ b/tests/tools/tflite_benchmark_model/stats_calculator.cc @@ -0,0 +1,317 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/util/stats_calculator.h" + +#include <iomanip> +#include <map> +#include <queue> +#include <sstream> +#include <string> +#include <algorithm> + +namespace tensorflow { + +StatsCalculator::StatsCalculator(const StatSummarizerOptions& options) + : options_(options) {} + +std::string StatsCalculator::GetShortSummary() const { + std::stringstream stream; + stream << "Timings (microseconds): "; + run_total_us_.OutputToStream(&stream); + stream << std::endl; + + stream << "Memory (bytes): "; + memory_.OutputToStream(&stream); + stream << std::endl; + + stream << details_.size() << " nodes observed" << std::endl; + return stream.str(); +} + +std::ostream& InitField(std::ostream& stream, int width) { + stream << "\t" << std::right << std::setw(width) << std::fixed + << std::setprecision(3); + return stream; +} + +std::string StatsCalculator::HeaderString(const std::string& title) const { + std::stringstream stream; + + stream << "============================== " << title + << " ==============================" << std::endl; + + InitField(stream, 24) << "[node type]"; + InitField(stream, 9) << "[start]"; + InitField(stream, 9) << "[first]"; + InitField(stream, 9) << "[avg ms]"; + InitField(stream, 8) << "[%]"; + InitField(stream, 8) << "[cdf%]"; + InitField(stream, 10) << "[mem KB]"; + InitField(stream, 9) << "[times called]"; + stream << "\t" + << "[Name]"; + return stream.str(); +} + +std::string StatsCalculator::ColumnString(const Detail& detail, + const int64_t cumulative_stat_on_node, + const Stat<int64_t>& stat) const { + const double start_ms = detail.start_us.avg() / 1000.0; + const double first_time_ms = detail.rel_end_us.first() / 1000.0; + const double avg_time_ms = detail.rel_end_us.avg() / 1000.0; + const double percentage = detail.rel_end_us.sum() * 100.0 / stat.sum(); + const double cdf_percentage = (cumulative_stat_on_node * 100.0f) / stat.sum(); + const int64_t times_called = detail.times_called / num_runs(); + + std::stringstream stream; + InitField(stream, 24) << detail.type; + InitField(stream, 9) << start_ms; + InitField(stream, 9) << first_time_ms; + InitField(stream, 9) << avg_time_ms; + InitField(stream, 7) << percentage << "%"; + InitField(stream, 7) << cdf_percentage << "%"; + InitField(stream, 10) << detail.mem_used.newest() / 1000.0; + InitField(stream, 9) << times_called; + stream << "\t" << detail.name; + + return stream.str(); +} + +void StatsCalculator::OrderNodesByMetric( + SortingMetric metric, std::vector<const Detail*>* details) const { + std::priority_queue<std::pair<std::string, const Detail*>> sorted_list; + const int num_nodes = details_.size(); + + for (const auto& det : details_) { + const Detail* detail = &(det.second); + std::stringstream stream; + stream << std::setw(20) << std::right << std::setprecision(10) + << std::fixed; + + switch (metric) { + case BY_NAME: + stream << detail->name; + break; + case BY_RUN_ORDER: + stream << num_nodes - detail->run_order; + break; + case BY_TIME: + stream << detail->rel_end_us.avg(); + break; + case BY_MEMORY: + stream << detail->mem_used.avg(); + break; + case BY_TYPE: + stream << detail->type; + break; + default: + stream << ""; + break; + } + + sorted_list.emplace(stream.str(), detail); + } + + while (!sorted_list.empty()) { + auto entry = sorted_list.top(); + sorted_list.pop(); + details->push_back(entry.second); + } +} + +void StatsCalculator::ComputeStatsByType( + std::map<std::string, int64_t>* node_type_map_count, + std::map<std::string, int64_t>* node_type_map_time, + std::map<std::string, int64_t>* node_type_map_memory, + std::map<std::string, int64_t>* node_type_map_times_called, + int64_t* accumulated_us) const { + int64_t run_count = run_total_us_.count(); + + for (const auto& det : details_) { + const std::string node_name = det.first; + const Detail& detail = det.second; + + int64_t curr_time_val = + static_cast<int64_t>(detail.rel_end_us.sum() / run_count); + *accumulated_us += curr_time_val; + + int64_t curr_memory_val = detail.mem_used.newest(); + + const std::string& node_type = detail.type; + + const std::string sharp1("#1"); + bool first = std::mismatch(sharp1.rbegin(), sharp1.rend(), node_name.rbegin()).first == sharp1.rend(); + + if (first) { + (*node_type_map_count)[node_type] += 1; + (*node_type_map_times_called)[node_type] += detail.times_called / run_count; + } + (*node_type_map_time)[node_type] += curr_time_val; + (*node_type_map_memory)[node_type] += curr_memory_val; + } +} + +std::string StatsCalculator::GetStatsByNodeType() const { + std::stringstream stream; + + stream << "Number of nodes executed: " << details_.size() << std::endl; + + stream << "============================== Summary by node type " + "==============================" + << std::endl; + + std::map<std::string, int64_t> node_type_map_count; + std::map<std::string, int64_t> node_type_map_time; + std::map<std::string, int64_t> node_type_map_memory; + std::map<std::string, int64_t> node_type_map_times_called; + int64_t accumulated_us = 0; + + ComputeStatsByType(&node_type_map_count, &node_type_map_time, + &node_type_map_memory, &node_type_map_times_called, + &accumulated_us); + + // Sort them. + std::priority_queue<std::pair<int64_t, std::pair<std::string, int64_t>>> + timings; + for (const auto& node_type : node_type_map_time) { + const int64_t mem_used = node_type_map_memory[node_type.first]; + timings.emplace(node_type.second, + std::pair<std::string, int64_t>(node_type.first, mem_used)); + } + + InitField(stream, 24) << "[Node type]"; + InitField(stream, 9) << "[count]"; + InitField(stream, 10) << "[avg ms]"; + InitField(stream, 11) << "[avg %]"; + InitField(stream, 11) << "[cdf %]"; + InitField(stream, 10) << "[mem KB]"; + InitField(stream, 10) << "[times called]"; + stream << std::endl; + + float cdf = 0.0f; + while (!timings.empty()) { + auto entry = timings.top(); + timings.pop(); + + const std::string node_type = entry.second.first; + const float memory = entry.second.second / 1000.0f; + + const int64_t node_type_total_us = entry.first; + const float time_per_run_ms = node_type_total_us / 1000.0f; + + const float percentage = + ((entry.first / static_cast<float>(accumulated_us)) * 100.0f); + cdf += percentage; + + InitField(stream, 24) << node_type; + InitField(stream, 9) << node_type_map_count[node_type]; + InitField(stream, 10) << time_per_run_ms; + InitField(stream, 10) << percentage << "%"; + InitField(stream, 10) << cdf << "%"; + InitField(stream, 10) << memory; + InitField(stream, 9) << node_type_map_times_called[node_type]; + stream << std::endl; + } + stream << std::endl; + return stream.str(); +} + +std::string StatsCalculator::GetStatsByMetric(const std::string& title, + SortingMetric sorting_metric, + int num_stats) const { + std::vector<const Detail*> details; + OrderNodesByMetric(sorting_metric, &details); + + double cumulative_stat_on_node = 0; + + std::stringstream stream; + stream << HeaderString(title) << std::endl; + int stat_num = 0; + for (auto detail : details) { + ++stat_num; + if (num_stats > 0 && stat_num > num_stats) { + break; + } + + // TODO(andrewharp): Make this keep track of the particular metric for cdf. + cumulative_stat_on_node += detail->rel_end_us.sum(); + stream << ColumnString(*detail, cumulative_stat_on_node, run_total_us_) + << std::endl; + } + stream << std::endl; + return stream.str(); +} + +std::string StatsCalculator::GetOutputString() const { + std::stringstream stream; + if (options_.show_run_order) { + stream << GetStatsByMetric("Run Order", BY_RUN_ORDER, + options_.run_order_limit); + } + if (options_.show_time) { + stream << GetStatsByMetric("Top by Computation Time", BY_TIME, + options_.time_limit); + } + if (options_.show_memory) { + stream << GetStatsByMetric("Top by Memory Use", BY_MEMORY, + options_.memory_limit); + } + if (options_.show_type) { + stream << GetStatsByNodeType(); + } + if (options_.show_summary) { + stream << GetShortSummary() << std::endl; + } + return stream.str(); +} + +void StatsCalculator::AddNodeStats(const std::string& name, + const std::string& type, int64_t run_order, + int64_t start_us, int64_t rel_end_us, + int64_t mem_used) { + Detail* detail = nullptr; + if (details_.find(name) == details_.end()) { + details_.insert({name, {}}); + detail = &details_.at(name); + detail->type = type; + detail->name = name; + detail->run_order = run_order; + } else { + detail = &details_.at(name); + } + detail->start_us.UpdateStat(start_us); + detail->rel_end_us.UpdateStat(rel_end_us); + detail->mem_used.UpdateStat(mem_used); + detail->times_called++; +} + +} // namespace tensorflow diff --git a/tests/tools/tflite_run/CMakeLists.txt b/tests/tools/tflite_run/CMakeLists.txt new file mode 100644 index 000000000..49d87318f --- /dev/null +++ b/tests/tools/tflite_run/CMakeLists.txt @@ -0,0 +1,26 @@ +list(APPEND TFLITE_RUN_SRCS "src/tflite_run.cc") +list(APPEND TFLITE_RUN_SRCS "src/bin_image.cc") +list(APPEND TFLITE_RUN_SRCS "src/args.cc") +list(APPEND TFLITE_RUN_SRCS "src/tensor_dumper.cc") +list(APPEND TFLITE_RUN_SRCS "src/tensor_loader.cc") + +add_executable(tflite_run ${TFLITE_RUN_SRCS}) +target_include_directories(tflite_run PRIVATE src) +target_link_libraries(tflite_run tensorflow-lite ${LIB_PTHREAD} dl nnfw_lib_tflite) +target_link_libraries(tflite_run boost_program_options boost_system boost_filesystem) + +install(TARGETS tflite_run DESTINATION bin) + +# TEST BUILD +nnfw_find_package(GTest) + +if(NOT GTest_FOUND) + return() +endif(NOT GTest_FOUND) + +## Add test cpp file +add_executable(tflite_test src/tflite_test.cc) +## Link test executable against gtest & gtest_main +target_link_libraries(tflite_test gtest gtest_main ${LIB_PTHREAD}) +## install test binary for packaging +install(TARGETS tflite_test DESTINATION unittest) diff --git a/tests/tools/tflite_run/README.md b/tests/tools/tflite_run/README.md new file mode 100644 index 000000000..35d2b6497 --- /dev/null +++ b/tests/tools/tflite_run/README.md @@ -0,0 +1,91 @@ +# tflite_run + +A simple Tensorflow Lite runner. It measures the elapsed time and optionally dump the input/output tensors or verify them. + +## Usage + +### Simple run + +This will run with random input data + +``` +$ ./tflite_run model.tflite +``` + +Output would look like: + +``` +input tensor indices = [0,] +Input image size is smaller than the size required by the model. Input will not be set. +output tensor indices = [308(max:984),] +Prepare takes 0.00126718 seconds +Invoke takes 7.09527 seconds +``` + +### Specifying input feature map + +We can specify input feature map, but it only accepts preprocessed data which means that the image files must be converted. + +TODO : Add input image preprocessing instruction + +``` +$ ./tflite_run model.tflite -i binary_input_file +``` + +### Dump the input and output tensors + +Dump the input and output tensors to a file. +``` +$ ./tflite_run model.tflite --dump golden +``` + +Why we do this is usually for later verification. The tensors are written to name "golden". + +### Compare with the saved outputs + +The result from `tflite_run` and binary file are compared with `--compare` option. + +``` +$ ls golden +golden +$ ./tflite_run model.tflite --compare golden +``` + +The output would look like: + +``` +input tensor indices = [0,] +Input image size is smaller than the size required by the model. Input will not be set. +output tensor indices = [308(max:984),] +Prepare takes 0.00126718 seconds +Invoke takes 7.09527 seconds +======================================== +Comparing the results with "golden2". +======================================== + Tensor #308: UNMATCHED + 1 diffs are detected + Max absolute diff at [0, 0] + expected: 99 + obtained: 0.000139008 + absolute diff: 98.9999 + Max relative diff at [0, 1007] + expected: 7.01825e-33 + obtained: 0.000139011 + relative diff: 1 + (tolerance level = 8.38861e+06) +``` + +If `--compare` option is on, the exit code will be depend on its compare result. 0 for matched, other number for unmatched. + +## How Verification Works + +For verification, we may follow these steps: + +1. Generate and store the verfication data (run with option `--dump`) + 1. Input Tensor does not matter as we will keep inputs along with outputs + 1. Interpreter.Invoke() + 1. Dump input tensors and output tensors to a file +1. Give the dumped file for other runtime that we want to verify (run with option `--compare`) + 1. Set interpreter's input to input tensor data from the file + 1. Interpreter.Invoke() + 1. Compare the results with output tensor data from the file diff --git a/tests/tools/tflite_run/src/args.cc b/tests/tools/tflite_run/src/args.cc new file mode 100644 index 000000000..713a0a9d2 --- /dev/null +++ b/tests/tools/tflite_run/src/args.cc @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "args.h" + +#include <iostream> + +#include <boost/filesystem.hpp> + +namespace TFLiteRun +{ + +Args::Args(const int argc, char **argv) +{ + Initialize(); + Parse(argc, argv); +} + +void Args::Initialize(void) +{ + + // General options + po::options_description general("General options"); + + // clang-format off + general.add_options() + ("help,h", "Display available options") + ("dump,d", po::value<std::string>()->default_value(""), "Output filename") + ("compare,c", po::value<std::string>()->default_value(""), "filename to be compared with") + ("tflite", po::value<std::string>()->required()); + // clang-format on + + _options.add(general); + _positional.add("tflite", 1); +} + +void Args::Parse(const int argc, char **argv) +{ + po::variables_map vm; + po::store(po::command_line_parser(argc, argv).options(_options).positional(_positional).run(), + vm); + po::notify(vm); + +#if 0 // Enable this when we have mutually conflicting options + { + auto conflicting_options = [&](const std::string &o1, const std::string &o2) { + if ((vm.count(o1) && !vm[o1].defaulted()) && (vm.count(o2) && !vm[o2].defaulted())) + { + throw boost::program_options::error(std::string("Two options '") + o1 + "' and '" + o2 + + "' cannot be given at once."); + } + }; + + conflicting_options("input", "compare"); + } +#endif + + if (vm.count("help")) + { + std::cout << "tflite_run\n\n"; + std::cout << "Usage: " << argv[0] << " <.tflite> [<options>]\n\n"; + std::cout << _options; + std::cout << "\n"; + + exit(0); + } + + if (vm.count("dump")) + { + _dump_filename = vm["dump"].as<std::string>(); + } + + if (vm.count("compare")) + { + _compare_filename = vm["compare"].as<std::string>(); + } + + if (vm.count("tflite")) + { + _tflite_filename = vm["tflite"].as<std::string>(); + + if (_tflite_filename.empty()) + { + // TODO Print usage instead of the below message + std::cerr << "Please specify tflite file. Run with `--help` for usage." + << "\n"; + + exit(1); + } + else + { + if (!boost::filesystem::exists(_tflite_filename)) + { + std::cerr << "tflite file not found: " << _tflite_filename << "\n"; + } + } + } +} + +} // end of namespace TFLiteRun diff --git a/tests/tools/tflite_run/src/args.h b/tests/tools/tflite_run/src/args.h new file mode 100644 index 000000000..5561544eb --- /dev/null +++ b/tests/tools/tflite_run/src/args.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __TFLITE_RUN_ARGS_H__ +#define __TFLITE_RUN_ARGS_H__ + +#include <string> +#include <boost/program_options.hpp> + +namespace po = boost::program_options; + +namespace TFLiteRun +{ + +class Args +{ +public: + Args(const int argc, char **argv); + void print(void); + + const std::string &getTFLiteFilename(void) const { return _tflite_filename; } + const std::string &getDumpFilename(void) const { return _dump_filename; } + const std::string &getCompareFilename(void) const { return _compare_filename; } + +private: + void Initialize(); + void Parse(const int argc, char **argv); + +private: + po::positional_options_description _positional; + po::options_description _options; + + std::string _tflite_filename; + std::string _dump_filename; + std::string _compare_filename; +}; + +} // end of namespace TFLiteRun + +#endif // __TFLITE_RUN_ARGS_H__ diff --git a/tests/tools/tflite_run/src/bin_image.cc b/tests/tools/tflite_run/src/bin_image.cc new file mode 100644 index 000000000..16d4c94f7 --- /dev/null +++ b/tests/tools/tflite_run/src/bin_image.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <iostream> +#include <fstream> + +#include "bin_image.h" + +BinImage::BinImage(unsigned int width, unsigned int height, unsigned int channels) + : _width(width), _height(height), _channels(channels) +{ +} + +BinImage::~BinImage() {} + +void BinImage::loadImage(const std::string &filename) +{ + std::ifstream fin(filename); + + if (!fin) + { + std::cerr << "image filename is not specified. " + << "Input image will not be set." << std::endl; + return; + } + + _image.reserve(_width * _height * _channels); + + // Assuption: binary image is stored in the order of [H,W,C] + for (unsigned int i = 0; i < _width * _height * _channels; ++i) + _image.push_back(fin.get()); +} + +void BinImage::AssignTensor(TfLiteTensor *t) +{ + float *p = t->data.f; + const int IMAGE_MEAN = 128; + const float IMAGE_STD = 128.0f; + + // to prevent runtime exception + if (_image.size() < _width * _height * _channels) + { + std::cerr << "Input image size is smaller than the size required by the model." + << " Input will not be set." << std::endl; + return; + } + + for (int x = 0; x < _width; ++x) + { + for (int y = 0; y < _height; ++y) + { + for (int c = 0; c < _channels; ++c) + { + *p++ = (_image[y * _width * _channels + x * _channels + c] - IMAGE_MEAN) / IMAGE_STD; + } + } + } +} diff --git a/tests/tools/tflite_run/src/bin_image.h b/tests/tools/tflite_run/src/bin_image.h new file mode 100644 index 000000000..845011be6 --- /dev/null +++ b/tests/tools/tflite_run/src/bin_image.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __TFLITE_RUN_LIBJPEG_H__ +#define __TFLITE_RUN_LIBJPEG_H__ + +#include <string> +#include <vector> + +#include "tensorflow/contrib/lite/context.h" + +class BinImage +{ +public: + BinImage(unsigned int width, unsigned int height, unsigned int channel); + ~BinImage(); + + void loadImage(const std::string &filename); + + void AssignTensor(TfLiteTensor *t); + +private: + unsigned int _width; + unsigned int _height; + unsigned int _channels; + + std::vector<unsigned char> _image; +}; + +#endif // __TFLITE_RUN_LIBJPEG_H__ diff --git a/tests/tools/tflite_run/src/tensor_dumper.cc b/tests/tools/tflite_run/src/tensor_dumper.cc new file mode 100644 index 000000000..8568c9b67 --- /dev/null +++ b/tests/tools/tflite_run/src/tensor_dumper.cc @@ -0,0 +1,54 @@ +#include "tensor_dumper.h" + +#include <fstream> +#include <iostream> +#include <cstring> + +#include "tensorflow/contrib/lite/interpreter.h" + +namespace TFLiteRun +{ + +TensorDumper::TensorDumper() +{ + // DO NOTHING +} + +void TensorDumper::addTensors(tflite::Interpreter &interpreter, const std::vector<int> &indices) +{ + for (const auto &o : indices) + { + const TfLiteTensor *tensor = interpreter.tensor(o); + int size = tensor->bytes; + std::vector<char> buffer; + buffer.resize(size); + memcpy(buffer.data(), tensor->data.raw, size); + _tensors.emplace_back(o, std::move(buffer)); + } +} + +void TensorDumper::dump(const std::string &filename) const +{ + // TODO Handle file open/write error + std::ofstream file(filename, std::ios::out | std::ios::binary); + + // Write number of tensors + uint32_t num_tensors = static_cast<uint32_t>(_tensors.size()); + file.write(reinterpret_cast<const char *>(&num_tensors), sizeof(num_tensors)); + + // Write tensor indices + for (const auto &t : _tensors) + { + file.write(reinterpret_cast<const char *>(&t._index), sizeof(int)); + } + + // Write data + for (const auto &t : _tensors) + { + file.write(t._data.data(), t._data.size()); + } + + file.close(); +} + +} // end of namespace TFLiteRun diff --git a/tests/tools/tflite_run/src/tensor_dumper.h b/tests/tools/tflite_run/src/tensor_dumper.h new file mode 100644 index 000000000..2805f1076 --- /dev/null +++ b/tests/tools/tflite_run/src/tensor_dumper.h @@ -0,0 +1,38 @@ +#ifndef __TFLITE_RUN_TENSOR_DUMPER_H__ +#define __TFLITE_RUN_TENSOR_DUMPER_H__ + +#include <memory> +#include <string> +#include <vector> + +namespace tflite +{ +class Interpreter; +} + +namespace TFLiteRun +{ + +class TensorDumper +{ +private: + struct Tensor + { + int _index; + std::vector<char> _data; + + Tensor(int index, std::vector<char> &&data) : _index(index), _data(std::move(data)) {} + }; + +public: + TensorDumper(); + void addTensors(tflite::Interpreter &interpreter, const std::vector<int> &indices); + void dump(const std::string &filename) const; + +private: + std::vector<Tensor> _tensors; +}; + +} // end of namespace TFLiteRun + +#endif // __TFLITE_RUN_TENSOR_DUMPER_H__ diff --git a/tests/tools/tflite_run/src/tensor_loader.cc b/tests/tools/tflite_run/src/tensor_loader.cc new file mode 100644 index 000000000..934b78f40 --- /dev/null +++ b/tests/tools/tflite_run/src/tensor_loader.cc @@ -0,0 +1,67 @@ +#include "tensor_loader.h" + +#include <assert.h> + +#include <fstream> + +#include "misc/tensor/Shape.h" + +namespace TFLiteRun +{ + +TensorLoader::TensorLoader(tflite::Interpreter &interpreter) + : _interpreter(interpreter), _raw_data(nullptr) +{ +} + +void TensorLoader::load(const std::string &filename) +{ + // TODO Handle file open/read error + std::ifstream file(filename, std::ios::ate | std::ios::binary); + size_t file_size = file.tellg(); + file.seekg(0, std::ios::beg); + + uint32_t num_tensors = 0; + file.read(reinterpret_cast<char *>(&num_tensors), sizeof(num_tensors)); + + int tensor_indices_raw[num_tensors]; + file.read(reinterpret_cast<char *>(tensor_indices_raw), sizeof(tensor_indices_raw)); + std::vector<int> tensor_indices(tensor_indices_raw, tensor_indices_raw + num_tensors); + + _raw_data = std::unique_ptr<float>(new float[file_size]); + file.read(reinterpret_cast<char *>(_raw_data.get()), file_size); + + size_t offset = 0; + for (const auto &o : tensor_indices) + { + const TfLiteTensor *tensor = _interpreter.tensor(o); + + // Convert tensor shape to `Shape` from `tensor->dims` + nnfw::misc::tensor::Shape shape(static_cast<size_t>(tensor->dims->size)); + for (int d = 0; d < tensor->dims->size; d++) + { + shape.dim(d) = tensor->dims->data[d]; + } + + float *base = _raw_data.get() + offset; + + assert(tensor->bytes % sizeof(float) == 0); + offset += (tensor->bytes / sizeof(float)); + + _tensor_map.insert(std::make_pair(o, nnfw::tflite::TensorView<float>(shape, base))); + } + + // The file size and total output tensor size must match + assert(file_size == sizeof(num_tensors) + sizeof(tensor_indices_raw) + offset * sizeof(float)); + + file.close(); +} + +const nnfw::tflite::TensorView<float> &TensorLoader::get(int tensor_idx) const +{ + auto found = _tensor_map.find(tensor_idx); + assert(found != _tensor_map.end()); + return found->second; +} + +} // end of namespace TFLiteRun diff --git a/tests/tools/tflite_run/src/tensor_loader.h b/tests/tools/tflite_run/src/tensor_loader.h new file mode 100644 index 000000000..fc4a37a08 --- /dev/null +++ b/tests/tools/tflite_run/src/tensor_loader.h @@ -0,0 +1,35 @@ +#ifndef __TFLITE_RUN_TENSOR_LOADER_H__ +#define __TFLITE_RUN_TENSOR_LOADER_H__ + +#include <sys/mman.h> + +#include <string> +#include <unordered_map> + +#include "tflite/TensorView.h" + +namespace tflite +{ +class Interpreter; +} + +namespace TFLiteRun +{ + +class TensorLoader +{ +public: + TensorLoader(tflite::Interpreter &interpreter); + void load(const std::string &filename); + const nnfw::tflite::TensorView<float> &get(int tensor_idx) const; + size_t getNums() const { return _tensor_map.size(); } + +private: + tflite::Interpreter &_interpreter; + std::unique_ptr<float> _raw_data; + std::unordered_map<int, nnfw::tflite::TensorView<float>> _tensor_map; +}; + +} // end of namespace TFLiteRun + +#endif // __TFLITE_RUN_TENSOR_LOADER_H__ diff --git a/tests/tools/tflite_run/src/tflite_run.cc b/tests/tools/tflite_run/src/tflite_run.cc new file mode 100644 index 000000000..5be6909e5 --- /dev/null +++ b/tests/tools/tflite_run/src/tflite_run.cc @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tflite/ext/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" + +#include "bin_image.h" +#include "args.h" +#include "tensor_dumper.h" +#include "tensor_loader.h" +#include "misc/benchmark.h" +#include "misc/environment.h" +#include "misc/fp32.h" +#include "tflite/Diff.h" +#include "tflite/Assert.h" +#include "tflite/Session.h" +#include "tflite/InterpreterSession.h" +#include "tflite/NNAPISession.h" +#include "misc/tensor/IndexIterator.h" +#include "misc/tensor/Object.h" + +#include <iostream> +#include <chrono> +#include <algorithm> + +using namespace tflite; +using namespace nnfw::tflite; +using namespace std::placeholders; // for _1, _2 ... + +void print_max_idx(float *f, int size) +{ + float *p = std::max_element(f, f + size); + std::cout << "max:" << p - f; +} + +int main(const int argc, char **argv) +{ + bool use_nnapi = false; + + if (std::getenv("USE_NNAPI") != nullptr) + { + use_nnapi = true; + } + + StderrReporter error_reporter; + + TFLiteRun::Args args(argc, argv); + + auto model = FlatBufferModel::BuildFromFile(args.getTFLiteFilename().c_str(), &error_reporter); + std::unique_ptr<Interpreter> interpreter; + + std::chrono::milliseconds t_prepare(0); + std::chrono::milliseconds t_invoke(0); + + nnfw::misc::benchmark::measure(t_prepare) << [&](void) { + BuiltinOpResolver resolver; + + InterpreterBuilder builder(*model, resolver); + + TFLITE_ENSURE(builder(&interpreter)) + + interpreter->SetNumThreads(1); + }; + + std::shared_ptr<nnfw::tflite::Session> sess; + + if (use_nnapi) + { + sess = std::make_shared<nnfw::tflite::NNAPISession>(interpreter.get()); + } + else + { + sess = std::make_shared<nnfw::tflite::InterpreterSession>(interpreter.get()); + } + + sess->prepare(); + + TFLiteRun::TensorLoader tensor_loader(*interpreter); + + // Load input from dumped tensor file. + if (!args.getCompareFilename().empty()) + { + tensor_loader.load(args.getCompareFilename()); + + for (const auto &o : interpreter->inputs()) + { + const auto &tensor_view = tensor_loader.get(o); + TfLiteTensor *tensor = interpreter->tensor(o); + + memcpy(reinterpret_cast<void *>(tensor->data.f), + reinterpret_cast<const void *>(tensor_view._base), tensor->bytes); + } + } + else + { + const int seed = 1; /* TODO Add an option for seed value */ + RandomGenerator randgen{seed, 0.0f, 2.0f}; + + // No input specified. So we fill the input tensors with random values. + for (const auto &o : interpreter->inputs()) + { + TfLiteTensor *tensor = interpreter->tensor(o); + if (tensor->type == kTfLiteInt32) + { + // Generate singed 32-bit integer (s32) input + auto tensor_view = nnfw::tflite::TensorView<int32_t>::make(*interpreter, o); + + int32_t value = 0; + + nnfw::misc::tensor::iterate(tensor_view.shape()) + << [&](const nnfw::misc::tensor::Index &ind) { + // TODO Generate random values + // Gather operation: index should be within input coverage. + tensor_view.at(ind) = value; + value++; + }; + } + else if (tensor->type == kTfLiteUInt8) + { + // Generate unsigned 8-bit integer input + auto tensor_view = nnfw::tflite::TensorView<uint8_t>::make(*interpreter, o); + + uint8_t value = 0; + + nnfw::misc::tensor::iterate(tensor_view.shape()) + << [&](const nnfw::misc::tensor::Index &ind) { + // TODO Generate random values + tensor_view.at(ind) = value; + value = (value + 1) & 0xFF; + }; + } + else if (tensor->type == kTfLiteBool) + { + // Generate bool input + auto tensor_view = nnfw::tflite::TensorView<bool>::make(*interpreter, o); + + auto fp = static_cast<bool (RandomGenerator::*)(const ::nnfw::misc::tensor::Shape &, + const ::nnfw::misc::tensor::Index &)>( + &RandomGenerator::generate<bool>); + const nnfw::misc::tensor::Object<bool> data(tensor_view.shape(), + std::bind(fp, randgen, _1, _2)); + + nnfw::misc::tensor::iterate(tensor_view.shape()) + << [&](const nnfw::misc::tensor::Index &ind) { + const auto value = data.at(ind); + tensor_view.at(ind) = value; + }; + } + else + { + assert(tensor->type == kTfLiteFloat32); + + const float *end = reinterpret_cast<const float *>(tensor->data.raw_const + tensor->bytes); + for (float *ptr = tensor->data.f; ptr < end; ptr++) + { + *ptr = randgen.generate<float>(); + } + } + } + } + + TFLiteRun::TensorDumper tensor_dumper; + // Must be called before `interpreter->Invoke()` + tensor_dumper.addTensors(*interpreter, interpreter->inputs()); + + std::cout << "input tensor indices = ["; + for (const auto &o : interpreter->inputs()) + { + std::cout << o << ","; + } + std::cout << "]" << std::endl; + + nnfw::misc::benchmark::measure(t_invoke) << [&sess](void) { + if (!sess->run()) + { + assert(0 && "run failed!"); + } + }; + + sess->teardown(); + + // Must be called after `interpreter->Invoke()` + tensor_dumper.addTensors(*interpreter, interpreter->outputs()); + + std::cout << "output tensor indices = ["; + for (const auto &o : interpreter->outputs()) + { + std::cout << o << "("; + + print_max_idx(interpreter->tensor(o)->data.f, interpreter->tensor(o)->bytes / sizeof(float)); + + std::cout << "),"; + } + std::cout << "]" << std::endl; + + std::cout << "Prepare takes " << t_prepare.count() / 1000.0 << " seconds" << std::endl; + std::cout << "Invoke takes " << t_invoke.count() / 1000.0 << " seconds" << std::endl; + + if (!args.getDumpFilename().empty()) + { + const std::string &dump_filename = args.getDumpFilename(); + tensor_dumper.dump(dump_filename); + std::cout << "Input/output tensors have been dumped to file \"" << dump_filename << "\"." + << std::endl; + } + + if (!args.getCompareFilename().empty()) + { + const std::string &compare_filename = args.getCompareFilename(); + std::cout << "========================================" << std::endl; + std::cout << "Comparing the results with \"" << compare_filename << "\"." << std::endl; + std::cout << "========================================" << std::endl; + + // TODO Code duplication (copied from RandomTestRunner) + + int tolerance = 1; + nnfw::misc::env::IntAccessor("TOLERANCE").access(tolerance); + + auto equals = [tolerance](float lhs, float rhs) { + // NOTE Hybrid approach + // TODO Allow users to set tolerance for absolute_epsilon_equal + if (nnfw::misc::fp32::absolute_epsilon_equal(lhs, rhs)) + { + return true; + } + + return nnfw::misc::fp32::epsilon_equal(lhs, rhs, tolerance); + }; + + nnfw::misc::tensor::Comparator comparator(equals); + TfLiteInterpMatchApp app(comparator); + bool res = true; + + for (const auto &o : interpreter->outputs()) + { + auto expected = tensor_loader.get(o); + auto obtained = nnfw::tflite::TensorView<float>::make(*interpreter, o); + + res = res && app.compareSingleTensorView(expected, obtained, o); + } + + if (!res) + { + return 255; + } + } + + return 0; +} diff --git a/tests/tools/tflite_run/src/tflite_test.cc b/tests/tools/tflite_run/src/tflite_test.cc new file mode 100644 index 000000000..d0d36c229 --- /dev/null +++ b/tests/tools/tflite_run/src/tflite_test.cc @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "gtest/gtest.h" + +TEST(TFLite_test_case, simple_test) { EXPECT_EQ(1, 1); } |