diff options
author | Alexey Suhov <alexey.suhov@intel.com> | 2019-10-02 17:30:49 +0300 |
---|---|---|
committer | Alexey Suhov <alexey.suhov@intel.com> | 2019-10-02 17:30:49 +0300 |
commit | 2c83de45b9c148c94f582861198d5dfe40b4e65e (patch) | |
tree | 85aa192e301c183520d6c233bc58caf02e247485 /tools/benchmark | |
parent | c37d4661a27afb408a45f7752acea968032afcc0 (diff) | |
download | dldt-2c83de45b9c148c94f582861198d5dfe40b4e65e.tar.gz dldt-2c83de45b9c148c94f582861198d5dfe40b4e65e.tar.bz2 dldt-2c83de45b9c148c94f582861198d5dfe40b4e65e.zip |
publish master branch
Diffstat (limited to 'tools/benchmark')
-rw-r--r-- | tools/benchmark/README.md | 157 | ||||
-rw-r--r-- | tools/benchmark/__init__.py | 0 | ||||
-rw-r--r-- | tools/benchmark/benchmark.py | 189 | ||||
-rw-r--r-- | tools/benchmark/requirements.txt | 4 | ||||
-rw-r--r-- | tools/benchmark/utils/__init__.py | 15 | ||||
-rw-r--r-- | tools/benchmark/utils/constants.py | 53 | ||||
-rw-r--r-- | tools/benchmark/utils/infer_request_wrap.py | 82 | ||||
-rw-r--r-- | tools/benchmark/utils/inputs_filling.py | 189 | ||||
-rw-r--r-- | tools/benchmark/utils/logging.py | 21 | ||||
-rw-r--r-- | tools/benchmark/utils/progress_bar.py | 65 | ||||
-rw-r--r-- | tools/benchmark/utils/statistics_report.py | 119 | ||||
-rw-r--r-- | tools/benchmark/utils/utils.py | 248 |
12 files changed, 1142 insertions, 0 deletions
diff --git a/tools/benchmark/README.md b/tools/benchmark/README.md new file mode 100644 index 000000000..fb427423d --- /dev/null +++ b/tools/benchmark/README.md @@ -0,0 +1,157 @@ +# Benchmark Python* Application + +This topic demonstrates how to run the Benchmark Application demo, which performs inference using convolutional networks. + +## How It Works + +Upon start-up, the application reads command-line parameters and loads a network and images/binary files to the Inference Engine +plugin, which is chosen depending on a specified device. The number of infer requests and execution approach depend +on the mode defined with the `-api` command-line parameter. + +> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md). + +### Synchronous API + +For synchronous mode, the primary metric is latency. The application creates one infer request and executes the `Infer` method. A number of executions is defined by one of the two values: +* Number of iterations defined with the `-niter` command-line argument +* Time duration specified with the `-t` command-line argument +* Both of them (execution will continue until both conditions are met) +* Predefined duration if `-niter` and `-t` are not specified. Predefined duration value depends on device. + +During the execution, the application collects two types of metrics: +* Latency for each infer request executed with `Infer` method +* Duration of all executions + +Reported latency value is calculated as mean value of all collected latencies. Reported throughput value is a derivative from reported latency and additionally depends on batch size. + +### Asynchronous API +For asynchronous mode, the primary metric is throughput in frames per second (FPS). The application creates a certain number of infer requests and executes the `StartAsync` method. A number of executions is defined by one of the two values: +* Number of iterations defined with the `-niter` command-line argument +* Time duration specified with the `-t` command-line argument +* Both of them (execution will continue until both conditions are met) +* Predefined duration if `-niter` and `-t` are not specified. Predefined duration value depends on device. + +The infer requests are executed asynchronously. Callback is used to wait for previous execution to complete. The application measures all infer requests executions and reports the throughput metric based on batch size and total execution duration. + +## Running +Notice that the benchmark_app usually produces optimal performance for any device out of the box. + +**So in most cases you don't need to play the app options explicitly and the plain device name is enough**, e.g.: +``` +$benchmark_app -m <model> -i <input> -d CPU +``` + +But it is still may be non-optimal for some cases, especially for very small networks. More details can read in [Introduction to Performance Topics](./docs/IE_DG/Intro_to_Performance.md). + +Running the application with the `-h` or `--help`' option yields the following usage message: + +``` +usage: benchmark_app.py [-h] [-i PATH_TO_INPUT] -m PATH_TO_MODEL + [-d TARGET_DEVICE] + [-l PATH_TO_EXTENSION] [-c PATH_TO_CLDNN_CONFIG] + [-api {sync,async}] [-niter NUMBER_ITERATIONS] + [-b BATCH_SIZE] + [-stream_output [STREAM_OUTPUT]] [-t TIME] + [-progress [PROGRESS]] [-nstreams NUMBER_STREAMS] + [-nthreads NUMBER_THREADS] [-pin {YES,NO}] + [--exec_graph_path EXEC_GRAPH_PATH] + [-pc [PERF_COUNTS]] + +Options: + -h, --help Show this help message and exit. + -i PATH_TO_INPUT, --path_to_input PATH_TO_INPUT + Optional. Path to a folder with images and/or binaries + or to specific image or binary file. + -m PATH_TO_MODEL, --path_to_model PATH_TO_MODEL + Required. Path to an .xml file with a trained model. + -d TARGET_DEVICE, --target_device TARGET_DEVICE + Optional. Specify a target device to infer on: CPU, + GPU, FPGA, HDDL or MYRIAD. + Use "-d HETERO:<comma separated devices list>" format to specify HETERO plugin. + Use "-d MULTI:<comma separated devices list>" format to specify MULTI plugin. + The application looks for a suitable plugin for the specified device. + -l PATH_TO_EXTENSION, --path_to_extension PATH_TO_EXTENSION + Optional. Required for CPU custom layers. Absolute + path to a shared library with the kernels + implementations. + -c PATH_TO_CLDNN_CONFIG, --path_to_cldnn_config PATH_TO_CLDNN_CONFIG + Optional. Required for GPU custom kernels. Absolute + path to an .xml file with the kernels description. + -api {sync,async}, --api_type {sync,async} + Optional. Enable using sync/async API. Default value + is async. + -niter NUMBER_ITERATIONS, --number_iterations NUMBER_ITERATIONS + Optional. Number of iterations. If not specified, the + number of iterations is calculated depending on a + device. + -b BATCH_SIZE, --batch_size BATCH_SIZE + Optional. Batch size value. If not specified, the + batch size value is determined from IR + -stream_output [STREAM_OUTPUT] + Optional. Print progress as a plain text. When + specified, an interactive progress bar is replaced + with a multiline output. + -t TIME, --time TIME Optional. Time in seconds to execute topology. + -progress [PROGRESS] Optional. Show progress bar (can affect performance + measurement). Default values is "False". + -nstreams NUMBER_STREAMS, --number_streams NUMBER_STREAMS + Optional. Number of streams to use for inference on the CPU/GPU in throughput mode + (for HETERO and MULTI device cases use format <device1>:<nstreams1>,<device2>:<nstreams2> or just <nstreams>). + Default value is determined automatically for a device. + Please note that although the automatic selection usually provides a reasonable performance, + it still may be non-optimal for some cases, especially for very small networks. + -nthreads NUMBER_THREADS, --number_threads NUMBER_THREADS + Number of threads to use for inference on the CPU + (including HETERO and MULTI cases). + -pin {YES,NO}, --infer_threads_pinning {YES,NO} + Optional. Enable ("YES" is default value) or disable + ("NO")CPU threads pinning for CPU-involved inference. + --exec_graph_path EXEC_GRAPH_PATH + Optional. Path to a file where to store executable + graph information serialized. + -pc [PERF_COUNTS], --perf_counts [PERF_COUNTS] + Optional. Report performance counters. + +``` + +Running the application with the empty list of options yields the usage message given above and an error message. + +Application supports topologies with one or more inputs. If a topology is not data sensitive, you can skip the input parameter. In this case, inputs are filled with random values. +If a model has only image input(s), please a provide folder with images or a path to an image as input. +If a model has some specific input(s) (not images), please prepare a binary file(s), which is filled with data of appropriate precision and provide a path to them as input. +If a model has mixed input types, input folder should contain all required files. Image inputs are filled with image files one by one. Binary inputs are filled with binary inputs one by one. + +To run the demo, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). + +> **NOTE**: Before running the demo with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). + +For example, to do inference of an image using a trained network with multiple outputs on CPU, run the following command: + +``` +python3 benchmark_app.py -i <path_to_image>/inputImage.bmp -m <path_to_model>/multiple-output.xml -d CPU +``` + +## Demo Output + +The application outputs number of executed iterations, total duration of execution, latency and throughput. +Additionally, if you set the `-pc` parameter, the application outputs performance counters. +If you set `-exec_graph_path`, the application reports executable graph information serialized. + +``` +[Step 8/9] Measuring performance (Start inference asyncronously, 60000 ms duration, 4 inference requests in parallel using 4 streams) +Progress: |................................| 100.00% + +[Step 9/9] Dumping statistics report +Progress: |................................| 100.00% + +Count: 4408 iterations +Duration: 60153.52 ms +Latency: 51.8244 ms +Throughput: 73.28 FPS + +``` + +## See Also +* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md) +* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) +* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) diff --git a/tools/benchmark/__init__.py b/tools/benchmark/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/tools/benchmark/__init__.py diff --git a/tools/benchmark/benchmark.py b/tools/benchmark/benchmark.py new file mode 100644 index 000000000..dc6d5f819 --- /dev/null +++ b/tools/benchmark/benchmark.py @@ -0,0 +1,189 @@ +""" + Copyright (C) 2018-2019 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +from datetime import datetime +from statistics import median +from openvino.inference_engine import IENetwork, IECore, get_version + +from .utils.constants import CPU_DEVICE_NAME, MULTI_DEVICE_NAME, GPU_DEVICE_NAME, MYRIAD_DEVICE_NAME +from .utils.logging import logger +from .utils.utils import get_duration_seconds, parse_value_per_device, parse_devices + + + +class Benchmark: + def __init__(self, device: str, number_infer_requests, number_iterations, duration_seconds, api_type): + self.device = device.upper() + self.ie = IECore() + self.nireq = number_infer_requests + self.niter = number_iterations + self.duration_seconds = get_duration_seconds(duration_seconds, self.niter, self.device) + self.api_type = api_type + self.device_number_streams = {} + + def __del__(self): + del self.ie + + def add_extension(self, path_to_extension: str=None, path_to_cldnn_config: str=None): + if GPU_DEVICE_NAME in self.device: + if path_to_cldnn_config: + self.ie.set_config({'CONFIG_FILE': path_to_cldnn_config}, GPU_DEVICE_NAME) + logger.info('GPU extensions is loaded {}'.format(path_to_cldnn_config)) + if CPU_DEVICE_NAME in self.device or MYRIAD_DEVICE_NAME in self.device: + if path_to_extension: + self.ie.add_extension(extension_path=path_to_extension, device_name=CPU_DEVICE_NAME) + logger.info('CPU extensions is loaded {}'.format(path_to_extension)) + + def get_version_info(self) -> str: + logger.info('InferenceEngine:\n{: <9}{:.<24} {}'.format('', 'API version', get_version())) + version_string = 'Device info\n' + for device, version in self.ie.get_versions(self.device).items(): + version_string += '{: <9}{}\n'.format('', device) + version_string += '{: <9}{:.<24}{} {}.{}\n'.format('', version.description, ' version', version.major, + version.minor) + version_string += '{: <9}{:.<24} {}\n'.format('', 'Build', version.build_number) + return version_string + + @staticmethod + def reshape(ie_network: IENetwork, batch_size: int): + new_shapes = {} + for input_layer_name, input_layer in ie_network.inputs.items(): + shape = input_layer.shape + layout = input_layer.layout + + try: + batch_index = layout.index('N') + except ValueError: + batch_index = 1 if layout == 'C' else -1 + + if batch_index != -1 and shape[batch_index] != batch_size: + shape[batch_index] = batch_size + new_shapes[input_layer_name] = shape + + if new_shapes: + logger.info('Resizing network to batch = {}'.format(batch_size)) + ie_network.reshape(new_shapes) + + def set_config(self, number_streams: int, api_type: str = 'async', + number_threads: int = None, infer_threads_pinning: int = None): + devices = parse_devices(self.device) + self.device_number_streams = parse_value_per_device(devices, number_streams) + for device in devices: + if device == CPU_DEVICE_NAME: # CPU supports few special performance-oriented keys + # limit threading for CPU portion of inference + if number_threads: + self.ie.set_config({'CPU_THREADS_NUM': str(number_threads)}, device) + + if MULTI_DEVICE_NAME in self.device and GPU_DEVICE_NAME in self.device: + self.ie.set_config({'CPU_BIND_THREAD': 'NO'}, CPU_DEVICE_NAME) + else: + # pin threads for CPU portion of inference + self.ie.set_config({'CPU_BIND_THREAD': infer_threads_pinning}, device) + + # for CPU execution, more throughput-oriented execution via streams + # for pure CPU execution, more throughput-oriented execution via streams + if api_type == 'async': + cpu_throughput = {'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO'} + if device in self.device_number_streams.keys(): + cpu_throughput['CPU_THROUGHPUT_STREAMS'] = str(self.device_number_streams.get(device)) + self.ie.set_config(cpu_throughput, device) + self.device_number_streams[device] = self.ie.get_config(device, 'CPU_THROUGHPUT_STREAMS') + + elif device == GPU_DEVICE_NAME: + if api_type == 'async': + gpu_throughput = {'GPU_THROUGHPUT_STREAMS': 'GPU_THROUGHPUT_AUTO'} + if device in self.device_number_streams.keys(): + gpu_throughput['GPU_THROUGHPUT_STREAMS'] = str(self.device_number_streams.get(device)) + self.ie.set_config(gpu_throughput, device) + self.device_number_streams[device] = self.ie.get_config(device, 'GPU_THROUGHPUT_STREAMS') + + if MULTI_DEVICE_NAME in self.device and CPU_DEVICE_NAME in self.device: + # multi-device execution with the CPU+GPU performs best with GPU trottling hint, + # which releases another CPU thread (that is otherwise used by the GPU driver for active polling) + self.ie.set_config({'CLDNN_PLUGIN_THROTTLE': '1'}, device) + + elif device == MYRIAD_DEVICE_NAME: + self.ie.set_config({'LOG_LEVEL': 'LOG_INFO', + 'VPU_LOG_LEVEL': 'LOG_WARNING'}, MYRIAD_DEVICE_NAME) + + def load_network(self, ie_network: IENetwork, perf_counts: bool, number_infer_requests: int = None): + config = {'PERF_COUNT': ('YES' if perf_counts else 'NO')} + + exe_network = self.ie.load_network(ie_network, + self.device, + config=config, + num_requests=number_infer_requests or 0) + + return exe_network + + def infer(self, request_queue, requests_input_data, batch_size, progress_bar): + progress_count = 0 + # warming up - out of scope + infer_request = request_queue.get_idle_request() + if not infer_request: + raise Exception('No idle Infer Requests!') + + if self.api_type == 'sync': + infer_request.infer(requests_input_data[infer_request.req_id]) + else: + infer_request.start_async(requests_input_data[infer_request.req_id]) + + request_queue.wait_all() + request_queue.reset_times() + + start_time = datetime.now() + exec_time = (datetime.now() - start_time).total_seconds() + iteration = 0 + + # Start inference & calculate performance + # to align number if iterations to guarantee that last infer requests are executed in the same conditions **/ + while (self.niter and iteration < self.niter) or \ + (self.duration_seconds and exec_time < self.duration_seconds) or \ + (self.api_type == 'async' and iteration % self.nireq): + infer_request = request_queue.get_idle_request() + if not infer_request: + raise Exception('No idle Infer Requests!') + + if self.api_type == 'sync': + infer_request.infer(requests_input_data[infer_request.req_id]) + else: + infer_request.start_async(requests_input_data[infer_request.req_id]) + iteration += 1 + + exec_time = (datetime.now() - start_time).total_seconds() + + if self.duration_seconds: + # calculate how many progress intervals are covered by current iteration. + # depends on the current iteration time and time of each progress interval. + # Previously covered progress intervals must be skipped. + progress_interval_time = self.duration_seconds / progress_bar.total_num + new_progress = int(exec_time / progress_interval_time - progress_count) + progress_bar.add_progress(new_progress) + progress_count += new_progress + elif self.niter: + progress_bar.add_progress(1) + + # wait the latest inference executions + request_queue.wait_all() + + total_duration_sec = request_queue.get_duration_in_seconds() + times = request_queue.times + times.sort() + latency_ms = median(times) + fps = batch_size * 1000 / latency_ms + if self.api_type == 'async': + fps = batch_size * iteration / total_duration_sec + progress_bar.finish() + return fps, latency_ms, total_duration_sec, iteration diff --git a/tools/benchmark/requirements.txt b/tools/benchmark/requirements.txt new file mode 100644 index 000000000..7042cb2a0 --- /dev/null +++ b/tools/benchmark/requirements.txt @@ -0,0 +1,4 @@ +py-cpuinfo +numpy +progress +opencv-python
\ No newline at end of file diff --git a/tools/benchmark/utils/__init__.py b/tools/benchmark/utils/__init__.py new file mode 100644 index 000000000..30917612e --- /dev/null +++ b/tools/benchmark/utils/__init__.py @@ -0,0 +1,15 @@ +""" + Copyright (C) 2018-2019 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" diff --git a/tools/benchmark/utils/constants.py b/tools/benchmark/utils/constants.py new file mode 100644 index 000000000..8ad915bcc --- /dev/null +++ b/tools/benchmark/utils/constants.py @@ -0,0 +1,53 @@ +""" + Copyright (C) 2018-2019 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the 'License'); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an 'AS IS' BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +VPU_DEVICE_NAME = 'VPU' +MYRIAD_DEVICE_NAME = 'MYRIAD' +HDDL_DEVICE_NAME = 'HDDL' +FPGA_DEVICE_NAME = 'FPGA' +CPU_DEVICE_NAME = 'CPU' +GPU_DEVICE_NAME = 'GPU' +HETERO_DEVICE_NAME = 'HETERO' +MULTI_DEVICE_NAME = 'MULTI' +UNKNOWN_DEVICE_TYPE = 'UNKNOWN' + +XML_EXTENSION = '.xml' +BIN_EXTENSION = '.bin' + +XML_EXTENSION_PATTERN = '*' + XML_EXTENSION + +IMAGE_EXTENSIONS = ['JPEG', 'JPG', 'PNG', 'BMP'] +BINARY_EXTENSIONS = ['BIN'] + +DEVICE_DURATION_IN_SECS = { + CPU_DEVICE_NAME: 60, + GPU_DEVICE_NAME: 60, + VPU_DEVICE_NAME: 60, + MYRIAD_DEVICE_NAME: 60, + HDDL_DEVICE_NAME: 60, + FPGA_DEVICE_NAME: 120, + UNKNOWN_DEVICE_TYPE: 120 +} + +DEVICE_NIREQ_ASYNC = { + CPU_DEVICE_NAME: 2, + GPU_DEVICE_NAME: 2, + VPU_DEVICE_NAME: 4, + MYRIAD_DEVICE_NAME: 4, + HDDL_DEVICE_NAME: 100, + FPGA_DEVICE_NAME: 3, + UNKNOWN_DEVICE_TYPE: 1 +} diff --git a/tools/benchmark/utils/infer_request_wrap.py b/tools/benchmark/utils/infer_request_wrap.py new file mode 100644 index 000000000..37a757def --- /dev/null +++ b/tools/benchmark/utils/infer_request_wrap.py @@ -0,0 +1,82 @@ +""" + Copyright (C) 2018-2019 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from datetime import datetime +import threading + + +class InferReqWrap: + def __init__(self, request, req_id, callback_queue): + self.req_id = req_id + self.request = request + self.request.set_completion_callback(self.callback, self.req_id) + self.callbackQueue = callback_queue + + def callback(self, status_code, user_data): + if user_data != self.req_id: + print('Request ID {} does not correspond to user data {}'.format(self.req_id, user_data)) + elif status_code: + print('Request {} failed with status code {}'.format(self.req_id, status_code)) + self.callbackQueue(self.req_id, self.request.latency) + + def start_async(self, input_data): + self.request.async_infer(input_data) + + def infer(self, input_data): + self.request.infer(input_data) + self.callbackQueue(self.req_id, self.request.latency) + + +class InferRequestsQueue: + def __init__(self, requests): + self.idleIds = [] + self.requests = [] + self.times = [] + for req_id in range(len(requests)): + self.requests.append(InferReqWrap(requests[req_id], req_id, self.put_idle_request)) + self.idleIds.append(req_id) + self.startTime = datetime.max + self.endTime = datetime.min + self.cv = threading.Condition() + + def reset_times(self): + self.times.clear() + + def get_duration_in_seconds(self): + return (self.endTime - self.startTime).total_seconds() + + def put_idle_request(self, req_id, latency): + self.cv.acquire() + self.times.append(latency) + self.idleIds.append(req_id) + self.endTime = max(self.endTime, datetime.now()) + self.cv.notify() + self.cv.release() + + def get_idle_request(self): + self.cv.acquire() + while len(self.idleIds) == 0: + self.cv.wait() + req_id = self.idleIds.pop() + self.startTime = min(datetime.now(), self.startTime) + self.cv.release() + return self.requests[req_id] + + def wait_all(self): + self.cv.acquire() + while len(self.idleIds) != len(self.requests): + self.cv.wait() + self.cv.release() diff --git a/tools/benchmark/utils/inputs_filling.py b/tools/benchmark/utils/inputs_filling.py new file mode 100644 index 000000000..8dcbee369 --- /dev/null +++ b/tools/benchmark/utils/inputs_filling.py @@ -0,0 +1,189 @@ +""" + Copyright (C) 2018-2019 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import os +import cv2 +import numpy as np + +from glob import glob + +from .constants import IMAGE_EXTENSIONS, BINARY_EXTENSIONS +from .logging import logger + + +def is_image(blob): + if blob.layout != "NCHW": + return False + channels = blob.shape[1] + return channels == 3 + + +def is_image_info(blob): + if blob.layout != "NC": + return False + channels = blob.shape[1] + return channels >= 2 + + +def get_inputs(path_to_input, batch_size, input_info, requests): + input_image_sizes = {} + for key in input_info.keys(): + if is_image(input_info[key]): + input_image_sizes[key] = (input_info[key].shape[2], input_info[key].shape[3]) + logger.info("Network input '{}' precision {}, dimensions ({}): {}".format(key, + input_info[key].precision, + input_info[key].layout, + " ".join(str(x) for x in + input_info[key].shape))) + + images_count = len(input_image_sizes.keys()) + binaries_count = len(input_info) - images_count + + image_files = list() + binary_files = list() + + if path_to_input: + image_files = get_files_by_extensions(path_to_input, IMAGE_EXTENSIONS) + image_files.sort() + binary_files = get_files_by_extensions(path_to_input, BINARY_EXTENSIONS) + binary_files.sort() + + if (len(image_files) == 0) and (len(binary_files) == 0): + logger.warn("No input files were given: all inputs will be filled with random values!") + else: + binary_to_be_used = binaries_count * batch_size * len(requests) + if binary_to_be_used > 0 and len(binary_files) == 0: + logger.warn("No supported binary inputs found! Please check your file extensions: {}".format( + ",".join(BINARY_EXTENSIONS))) + elif binary_to_be_used > len(binary_files): + logger.warn( + "Some binary input files will be duplicated: {} files are required, but only {} were provided".format( + binary_to_be_used, len(binary_files))) + elif binary_to_be_used < len(binary_files): + logger.warn( + "Some binary input files will be ignored: only {} files are required from {}".format(binary_to_be_used, + len(binary_files))) + + images_to_be_used = images_count * batch_size * len(requests) + if images_to_be_used > 0 and len(image_files) == 0: + logger.warn("No supported image inputs found! Please check your file extensions: {}".format( + ",".join(IMAGE_EXTENSIONS))) + elif images_to_be_used > len(image_files): + logger.warn( + "Some image input files will be duplicated: {} files are required, but only {} were provided".format( + images_to_be_used, len(image_files))) + elif images_to_be_used < len(image_files): + logger.warn( + "Some image input files will be ignored: only {} files are required from {}".format(images_to_be_used, + len(image_files))) + + requests_input_data = [] + for request_id in range(0, len(requests)): + logger.info("Infer Request {} filling".format(request_id)) + input_data = {} + keys = list(input_info.keys()) + for key in keys: + if is_image(input_info[key]): + # input is image + if (len(image_files) > 0): + input_data[key] = fill_blob_with_image(image_files, request_id, batch_size, keys.index(key), + len(keys), input_info[key].shape) + continue + + # input is binary + if (len(binary_files) > 0): + input_data[key] = fill_blob_with_binary(binary_files, input_info[key].shape) + continue + + # most likely input is image info + if is_image_info(input_info[key]) and len(input_image_sizes) == 1: + image_size = input_image_sizes[list(input_image_sizes.keys()).pop()] + logger.info("Fill input '" + key + "' with image size " + str(image_size[0]) + "x" + + str(image_size[1])) + input_data[key] = fill_blob_with_image_info(image_size, input_info[key].shape) + continue + + # fill with random data + logger.info("Fill input '{}' with random values ({} is expected)".format(key, "image" if is_image( + input_info[key]) else "some binary data")) + input_data[key] = fill_blob_with_random(input_info[key].precision, input_info[key].shape) + + requests_input_data.append(input_data) + + return requests_input_data + + +def get_files_by_extensions(path_to_input, extensions): + input_files = list() + if os.path.isfile(path_to_input): + input_files.append(path_to_input) + else: + path = os.path.join(path_to_input, '*') + files = glob(path, recursive=True) + for file in files: + file_extension = file.rsplit('.').pop().upper() + if file_extension in extensions: + input_files.append(file) + return input_files + + +def fill_blob_with_image(image_paths, request_id, batch_size, input_id, input_size, shape): + images = np.ndarray(shape) + image_index = request_id * batch_size * input_size + input_id + for b in range(batch_size): + image_index %= len(image_paths) + image_filename = image_paths[image_index] + logger.info('Prepare image {}'.format(image_filename)) + image = cv2.imread(image_filename) + + new_im_size = tuple(shape[2:]) + if image.shape[:-1] != new_im_size: + logger.warn("Image is resized from ({}) to ({})".format(image.shape[:-1], new_im_size)) + image = cv2.resize(image, new_im_size) + + image = image.transpose((2, 1, 0)) + images[b] = image + + image_index += input_size + return images + + +def fill_blob_with_image_info(image_size, shape): + im_info = np.ndarray(shape) + for b in range(shape[0]): + for i in range(shape[1]): + im_info[b][i] = image_size[i] if i in [0, 1] else 1 + + return im_info + + +def fill_blob_with_random(precision, shape): + if precision == "FP32": + return np.random.rand(*shape).astype(np.float32) + elif precision == "FP16": + return np.random.rand(*shape).astype(np.float16) + elif precision == "I32": + return np.random.rand(*shape).astype(np.int32) + elif precision == "U8": + return np.random.rand(*shape).astype(np.uint8) + elif precision == "I8": + return np.random.rand(*shape).astype(np.int8) + elif precision == "U16": + return np.random.rand(*shape).astype(np.uint16) + elif precision == "I16": + return np.random.rand(*shape).astype(np.int16) + else: + raise Exception("Input precision is not supported: " + precision) diff --git a/tools/benchmark/utils/logging.py b/tools/benchmark/utils/logging.py new file mode 100644 index 000000000..8adf13884 --- /dev/null +++ b/tools/benchmark/utils/logging.py @@ -0,0 +1,21 @@ +""" + Copyright (C) 2018-2019 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import logging +import sys + +logging.basicConfig(format="[ %(levelname)s ] %(message)s", level=logging.INFO, stream=sys.stdout) +logger = logging.getLogger('BenchmarkApp') diff --git a/tools/benchmark/utils/progress_bar.py b/tools/benchmark/utils/progress_bar.py new file mode 100644 index 000000000..1f44efc8e --- /dev/null +++ b/tools/benchmark/utils/progress_bar.py @@ -0,0 +1,65 @@ +""" + Copyright (C) 2018-2019 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from progress.bar import Bar + + +class ProgressBar: + def __init__(self, total_num, stream_output=False, progress_enabled=False): + self.stream_output = stream_output + self.is_finished = True + self.progress_enabled = progress_enabled + self.percent_to_update = 1 + self.cur_progress = 0 + self.total_num = total_num + self.reset(total_num) + + def add_progress(self, num): + self.is_finished = False + if self.progress_enabled: + self.cur_progress += num + total_progress = self.bar.max + if self.cur_progress > total_progress: + self.cur_progress = total_progress + + prev_progress = self.bar.index + prev_percent = 100 * prev_progress / total_progress + cur_percent = 100 * self.cur_progress / total_progress + if prev_progress == 0 or \ + self.cur_progress == total_progress or \ + prev_percent + self.percent_to_update <= cur_percent: + self.bar.next(self.cur_progress - self.bar.index) + if self.stream_output: + print() + + def finish(self, num=0): + if num: + self.add_progress(num) + + self.is_finished = True + if self.progress_enabled: + self.bar.finish() + print() + + def reset(self, total_num): + if self.progress_enabled: + self.bar = Bar('Progress:', max=total_num, fill='.', suffix='%(percent).d%%') + + def new_bar(self, total_num): + if self.is_finished: + self.reset(total_num) + else: + raise Exception('Cannot create a new bar. Current bar is still in progress') diff --git a/tools/benchmark/utils/statistics_report.py b/tools/benchmark/utils/statistics_report.py new file mode 100644 index 000000000..daa0490ea --- /dev/null +++ b/tools/benchmark/utils/statistics_report.py @@ -0,0 +1,119 @@ +""" + Copyright (C) 2018-2019 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +import os +import sys +from enum import Enum + +from .logging import logger + +## statistics reports types +noCntReport = 'no_counters' +averageCntReport = 'average_counters' +detailedCntReport = 'detailed_counters' + +## Responsible for collecting of statistics and dumping to .csv file +class StatisticsReport: + class Config(): + def __init__(self, report_type, report_folder): + self.report_type = report_type + self.report_folder = report_folder + + class Category(Enum): + COMMAND_LINE_PARAMETERS = 0, + RUNTIME_CONFIG = 1, + EXECUTION_RESULTS = 2 + + def __init__(self, config): + self.config = config + self.parameters = {} + self.csv_separator = ';' + + def add_parameters(self, category, parameters): + if category not in self.parameters.keys(): + self.parameters[category] = parameters + else: + self.parameters[category].extend(parameters) + + def dump(self): + def dump_parameters(f, parameters): + for k, v in parameters: + f.write('{}{}{}\n'.format(k, self.csv_separator, v)) + + with open(os.path.join(self.config.report_folder, 'benchmark_report.csv'), 'w') as f: + if self.Category.COMMAND_LINE_PARAMETERS in self.parameters.keys(): + f.write('Command line parameters\n') + dump_parameters(f, self.parameters[self.Category.COMMAND_LINE_PARAMETERS]) + f.write('\n') + + if self.Category.RUNTIME_CONFIG in self.parameters.keys(): + f.write('Configuration setup\n') + dump_parameters(f, self.parameters[self.Category.RUNTIME_CONFIG]) + f.write('\n') + + if self.Category.EXECUTION_RESULTS in self.parameters.keys(): + f.write('Execution results\n') + dump_parameters(f, self.parameters[self.Category.EXECUTION_RESULTS]) + f.write('\n') + + logger.info("Statistics report is stored to {}".format(f.name)) + + def dump_performance_counters_request(self, f, perf_counts): + total = 0 + total_cpu = 0 + f.write(self.csv_separator.join(['layerName', 'execStatus', 'layerType', 'execType', 'realTime (ms)', 'cpuTime (ms)\n'])) + for k, v in sorted(perf_counts.items(), key=lambda x: x[1]['execution_index']): + f.write(self.csv_separator.join([k, v['status'], v['layer_type'], v['exec_type'], str(v['real_time']/1000.0), str(v['cpu_time']/1000.0)])) + f.write('\n') + total += v['real_time'] + total_cpu += v['cpu_time'] + f.write(self.csv_separator.join(['Total','','','',str(total/1000.0),str(total_cpu/1000.0)])) + f.write('\n\n') + + def dump_performance_counters(self, perf_counts): + if self.config.report_type == '' or self.config.report_type == noCntReport: + logger.info("Statistics collecting for performance counters was not requested. No reports are dumped.") + return + + if not perf_counts: + logger.info('Peformance counters are empty. No reports are dumped.') + return + + filename = os.path.join(self.config.report_folder, 'benchmark_{}_report.csv'.format(self.config.report_type)) + with open(filename, 'w') as f: + if self.config.report_type == detailedCntReport: + for pc in perf_counts: + self.dump_performance_counters_request(f, pc) + elif self.config.report_type == averageCntReport: + def get_average_performance_counters(perf_counts): + performance_counters_avg = {} + ## iterate over each processed infer request and handle its PM data + for i in range(0, len(perf_counts)): + ## iterate over each layer from sorted vector and add required PM data to the per-layer maps + for k in perf_counts[0].keys(): + if k not in performance_counters_avg.keys(): + performance_counters_avg[k] = perf_counts[i][k] + else: + performance_counters_avg[k]['real_time'] += perf_counts[i][k]['real_time'] + performance_counters_avg[k]['cpu_time'] += perf_counts[i][k]['cpu_time'] + for _, v in performance_counters_avg.items(): + v['real_time'] /= len(perf_counts) + v['cpu_time'] /= len(perf_counts) + return performance_counters_avg + self.dump_performance_counters_request(f, get_average_performance_counters(perf_counts)) + else: + raise Exception('PM data can only be collected for average or detailed report types') + + logger.info('Pefromance counters report is stored to {}'.format(filename)) diff --git a/tools/benchmark/utils/utils.py b/tools/benchmark/utils/utils.py new file mode 100644 index 000000000..8fe49b669 --- /dev/null +++ b/tools/benchmark/utils/utils.py @@ -0,0 +1,248 @@ +""" + Copyright (C) 2018-2019 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +import os + +from openvino.inference_engine import IENetwork + +from .constants import DEVICE_DURATION_IN_SECS, UNKNOWN_DEVICE_TYPE, DEVICE_NIREQ_ASYNC, BIN_EXTENSION, \ + CPU_DEVICE_NAME, GPU_DEVICE_NAME +from .inputs_filling import is_image +from .logging import logger + + +def static_vars(**kwargs): + def decorate(func): + for k in kwargs: + setattr(func, k, kwargs[k]) + return func + + return decorate + + +@static_vars(step_id=0) +def next_step(additional_info=''): + step_names = { + 1: "Parsing and validating input arguments", + 2: "Loading Inference Engine", + 3: "Reading the Intermediate Representation network", + 4: "Resizing network to match image sizes and given batch", + 5: "Configuring input of the model", + 6: "Setting device configuration", + 7: "Loading the model to the device", + 8: "Setting optimal runtime parameters", + 9: "Creating infer requests and filling input blobs with images", + 10: "Measuring performance", + 11: "Dumping statistics report", + } + + next_step.step_id += 1 + if next_step.step_id not in step_names.keys(): + raise Exception('Step ID {} is out of total steps number '.format(next_step.step_id, str(len(step_names)))) + + step_info_template = '[Step {}/{}] {}' + step_name = step_names[next_step.step_id] + (' ({})'.format(additional_info) if additional_info else '') + step_info_template = step_info_template.format(next_step.step_id, len(step_names), step_name) + print(step_info_template) + + +def read_network(path_to_model: str): + xml_filename = os.path.abspath(path_to_model) + head, tail = os.path.splitext(xml_filename) + bin_filename = os.path.abspath(head + BIN_EXTENSION) + + ie_network = IENetwork(xml_filename, bin_filename) + + input_info = ie_network.inputs + + if not input_info: + raise AttributeError('No inputs info is provided') + + return ie_network + + +def config_network_inputs(ie_network: IENetwork): + input_info = ie_network.inputs + + for key in input_info.keys(): + if is_image(input_info[key]): + # Set the precision of input data provided by the user + # Should be called before load of the network to the plugin + input_info[key].precision = 'U8' + + +def get_number_iterations(number_iterations: int, nireq: int, api_type: str): + niter = number_iterations + + if api_type == 'async' and niter: + niter = int((niter + nireq - 1) / nireq) * nireq + if number_iterations != niter: + logger.warn('Number of iterations was aligned by request number ' + 'from {} to {} using number of requests {}'.format(number_iterations, niter, nireq)) + + return niter + + +def get_duration_seconds(time, number_iterations, device): + if time: + # time limit + return time + + if not number_iterations: + return get_duration_in_secs(device) + return 0 + + +def get_duration_in_milliseconds(duration): + return duration * 1000 + + +def get_duration_in_secs(target_device): + duration = 0 + for device in DEVICE_DURATION_IN_SECS: + if device in target_device: + duration = max(duration, DEVICE_DURATION_IN_SECS[device]) + + if duration == 0: + duration = DEVICE_DURATION_IN_SECS[UNKNOWN_DEVICE_TYPE] + logger.warn('Default duration {} seconds is used for unknown device {}'.format(duration, target_device)) + + return duration + + +def get_nireq(target_device): + nireq = 0 + for device in DEVICE_NIREQ_ASYNC: + if device in target_device: + nireq = max(nireq, DEVICE_NIREQ_ASYNC[device]) + + if nireq == 0: + nireq = DEVICE_NIREQ_ASYNC[UNKNOWN_DEVICE_TYPE] + logger.warn('Default number of requests {} is used for unknown device {}'.format(nireq, target_device)) + + return nireq + + +def parse_devices(device_string): + devices = device_string + if ':' in devices: + devices = devices.partition(':')[2] + return [d[:d.index('(')] if '(' in d else d for d in devices.split(',')] + + +def parse_value_per_device(devices, values_string): + # Format: <device1>:<value1>,<device2>:<value2> or just <value> + result = {} + if not values_string: + return result + device_value_strings = values_string.upper().split(',') + for device_value_string in device_value_strings: + device_value_vec = device_value_string.split(':') + if len(device_value_vec) == 2: + for device in devices: + if device == device_value_vec[0]: + value = int(device_value_vec[1]) + result[device_value_vec[0]] = value + break + elif len(device_value_vec) == 1: + value = int(device_value_vec[0]) + for device in devices: + result[device] = value + elif not device_value_vec: + raise Exception('Unknown string format: ' + values_string) + return result + + +def process_help_inference_string(benchmark_app): + output_string = 'Start inference {}ronously'.format(benchmark_app.api_type) + if benchmark_app.api_type == 'async': + output_string += ', {} inference requests'.format(benchmark_app.nireq) + + device_ss = '' + if CPU_DEVICE_NAME in benchmark_app.device: + device_ss += str(benchmark_app.ie.get_config(CPU_DEVICE_NAME, 'CPU_THROUGHPUT_STREAMS')) + device_ss += ' streams for {}'.format(CPU_DEVICE_NAME) + if GPU_DEVICE_NAME in benchmark_app.device: + device_ss += ', ' if device_ss else '' + device_ss += str(benchmark_app.ie.get_config(GPU_DEVICE_NAME, 'GPU_THROUGHPUT_STREAMS')) + device_ss += ' streams for {}'.format(GPU_DEVICE_NAME) + + if device_ss: + output_string += ' using ' + device_ss + + limits = '' + + if benchmark_app.niter and not benchmark_app.duration_seconds: + limits += '{} iterations'.format(benchmark_app.niter) + + if benchmark_app.duration_seconds: + limits += '{} ms duration'.format(get_duration_in_milliseconds(benchmark_app.duration_seconds)) + if limits: + output_string += ', limits: ' + limits + + return output_string + + +def dump_exec_graph(exe_network, exec_graph_path): + try: + exec_graph_info = exe_network.get_exec_graph_info() + exec_graph_info.serialize(exec_graph_path) + logger.info('Executable graph is stored to {}'.format(exec_graph_path)) + del exec_graph_info + except Exception as e: + logger.exception(e) + + +def print_perf_counters(perf_counts_list): + for ni in range(len(perf_counts_list)): + perf_counts = perf_counts_list[ni] + total_time = 0 + total_time_cpu = 0 + logger.info("Performance counts for {}-th infer request".format(ni)) + for layer, stats in sorted(perf_counts.items(), key=lambda x: x[1]['execution_index']): + max_layer_name = 30 + print("{:<30}{:<15}{:<30}{:<20}{:<20}{:<20}".format( + layer[:max_layer_name - 4] + '...' if (len(layer) >= max_layer_name) else layer, + stats['status'], + 'layerType: ' + str(stats['layer_type']), + 'realTime: ' + str(stats['real_time']), + 'cpu: ' + str(stats['cpu_time']), + 'execType: ' + str(stats['exec_type']))) + total_time += stats['real_time'] + total_time_cpu += stats['cpu_time'] + print('Total time: {} microseconds'.format(total_time)) + print('Total CPU time: {} microseconds\n'.format(total_time_cpu)) + +def get_command_line_arguments(argv): + parameters = [] + arg_name = '' + arg_value = '' + for arg in argv[1:]: + if '=' in arg: + arg_name, arg_value = arg.split('=') + parameters.append((arg_name, arg_value)) + arg_name = '' + arg_value = '' + else: + if arg[0] == '-': + if arg_name is not '': + parameters.append((arg_name, arg_value)) + arg_value = '' + arg_name = arg + else: + arg_value = arg + if arg_name is not '': + parameters.append((arg_name, arg_value)) + return parameters |