diff options
author | Alexey Suhov <asuhov@users.noreply.github.com> | 2018-11-23 16:19:43 +0300 |
---|---|---|
committer | openvino-pushbot <44090433+openvino-pushbot@users.noreply.github.com> | 2018-11-23 16:19:43 +0300 |
commit | 55a41d7570f78aaea0d6764d157dd7434730d56f (patch) | |
tree | ba022c71609b93d51119bcb25e5ccb8c7147dbd3 /inference-engine/samples/calibration_tool | |
parent | 54eab180361ec09fbd82e2bb62adfeb521275774 (diff) | |
download | dldt-55a41d7570f78aaea0d6764d157dd7434730d56f.tar.gz dldt-55a41d7570f78aaea0d6764d157dd7434730d56f.tar.bz2 dldt-55a41d7570f78aaea0d6764d157dd7434730d56f.zip |
Publishing R4 (#41)
* Publishing R4
Diffstat (limited to 'inference-engine/samples/calibration_tool')
9 files changed, 2256 insertions, 0 deletions
diff --git a/inference-engine/samples/calibration_tool/CMakeLists.txt b/inference-engine/samples/calibration_tool/CMakeLists.txt new file mode 100644 index 000000000..7ec85ed42 --- /dev/null +++ b/inference-engine/samples/calibration_tool/CMakeLists.txt @@ -0,0 +1,68 @@ +# Copyright (c) 2018 Intel Corporation + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +cmake_minimum_required(VERSION 2.8) + +set (TARGET_NAME "calibration_tool") + +file (GLOB MAIN_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../validation_app/pugixml/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../validation_app/ClassificationProcessor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../validation_app/classification_set_generator.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../validation_app/image_decoder.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../validation_app/ObjectDetectionProcessor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../validation_app/Processor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../validation_app/VOCAnnotationParser.cpp + ) + +file (GLOB MAIN_HEADERS + ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/pugixml/*.hpp + ) + +# Create named folders for the sources within the .vcproj +# Empty name lists them directly under the .vcproj +source_group("src" FILES ${MAIN_SRC}) +source_group("include" FILES ${MAIN_HEADERS}) + +# opencv include folders +find_package(OpenCV QUIET COMPONENTS core imgproc highgui imgcodecs) +if(NOT(OpenCV_FOUND)) + find_package(OpenCV QUIET COMPONENTS world) + if(NOT(OpenCV_FOUND)) + message(WARNING "No suitable OpenCV version detected, " ${TARGET_NAME} " skipped") + return() + endif() +endif() + +# Properties->C/C++->General->Additional Include Directories +include_directories (${CMAKE_CURRENT_SOURCE_DIR}/../classification_sample/core + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${CMAKE_CURRENT_SOURCE_DIR}/../common/os/windows + ${CMAKE_CURRENT_SOURCE_DIR}/../../include + ${OpenCV_INCLUDE_DIRS} + ${CMAKE_CURRENT_SOURCE_DIR}/../validation_app) + +link_directories(${LIB_FOLDER}) + +# Create library file from sources. +add_executable(${TARGET_NAME} ${MAIN_SRC} ${MAIN_HEADERS}) + +set_target_properties(${TARGET_NAME} PROPERTIES "CMAKE_CXX_FLAGS" "${CMAKE_CXX_FLAGS} -fPIE" +COMPILE_PDB_NAME ${TARGET_NAME}) +target_link_libraries(${TARGET_NAME} gflags IE::ie_cpu_extension ${InferenceEngine_LIBRARIES} ${OpenCV_LIBRARIES}) +if (UNIX) + target_link_libraries(${TARGET_NAME} dl) +endif() + diff --git a/inference-engine/samples/calibration_tool/README.md b/inference-engine/samples/calibration_tool/README.md new file mode 100644 index 000000000..b05b11bfd --- /dev/null +++ b/inference-engine/samples/calibration_tool/README.md @@ -0,0 +1,103 @@ +# Calibration Tool + +Inference Engine Calibration Tool calibrates a given FP32 model so that is can be run in low-precision 8-bit integer +mode while keeping the input data of this model in the original precision. + +## Calibration Tool Options + +The core command-line options for the Calibration Tool are the same as for +[Validation Application](./samples/validation_app/README.md). However, the Calibration Tool has the following specific options: `-t`, `-subset`, `-output`, and `-threshold`. + +Running the Calibration Tool with the `-h` option yields the following usage message with all CLI options listed: +```sh +Usage: calibration_tool [OPTION] + +Available options: + + -h Print a help message + -t <type> Type of an inferred network ("C" by default) + -t "C" to calibrate Classification network and write the calibrated network to IR + -t "OD" to calibrate Object Detection network and write the calibrated network to IR + -t "RawC" to collect only statistics for Classification network and write statistics to IR. With this option, a model is not calibrated. For calibration and statisctics collection, use "-t C" instead. + -t "RawOD" to collect only statistics for Object Detection network and write statistics to IR. With this option, a model is not calibrated. For calibration and statisctics collection, use "-t OD" instead + -i <path> Required. Path to a directory with validation images. For Classification models, the directory must contain folders named as labels with images inside or a .txt file with a list of images. For Object Detection models, the dataset must be in VOC format. + -m <path> Required. Path to an .xml file with a trained model, including model name and extension. + -l <absolute_path> Required for CPU custom layers. Absolute path to a shared library with the kernel implementations. + -c <absolute_path> Required for GPU custom kernels. Absolute path to an .xml file with the kernel descriptions. + -d <device> Target device to infer on: CPU (default), GPU, FPGA, or MYRIAD. The application looks for a suitable plugin for the specified device. + -b N Batch size value. If not specified, the batch size value is taken from IR + -ppType <type> Preprocessing type. Options: "None", "Resize", "ResizeCrop" + -ppSize N Preprocessing size (used with ppType="ResizeCrop") + -ppWidth W Preprocessing width (overrides -ppSize, used with ppType="ResizeCrop") + -ppHeight H Preprocessing height (overrides -ppSize, used with ppType="ResizeCrop") + --dump Dump file names and inference results to a .csv file + -subset Number of pictures from the whole validation set tocreate the calibration dataset. Default value is 0, which stands forthe whole provided dataset + -output <output_IR> Output name for calibrated model. Default is <original_model_name>_i8.xml|bin + -threshold Threshold for a maximum accuracy drop of quantized model. Must be an integer number (percents) without a percent sign. Default value is 1, which stands for accepted accuracy drop in 1% + + Classification-specific options: + -Czb true "Zero is a background" flag. Some networks are trained with a modified dataset where the class IDs are enumerated from 1, but 0 is an undefined "background" class (which is never detected) + + Object detection-specific options: + -ODkind <kind> Type of an Object Detection model. Options: SSD + -ODa <path> Required for Object Detection models. Path to a directory containing an .xml file with annotations for images. + -ODc <file> Required for Object Detection models. Path to a file with a list of classes + -ODsubdir <name> Directory between the path to images (specified with -i) and image name (specified in the .xml file). For VOC2007 dataset, use JPEGImages. +``` + +The tool options are divided into two categories: +1. **Common options** named with a single letter or a word, such as <code>-b</code> or <code>--dump</code>. + These options are the same in all calibration tool modes. +2. **Network type-specific options** named as an acronym of the network type (<code>C</code> or <code>OD</code>) + followed by a letter or a word. + + +## Calibrate a Classification Model + +To calibrate a classification convolutional neural network (CNN) +on a subset of images (first 2000 images) from the given dataset (specified with the `-i` option), run the following command: + +```bash +./calibration_tool -t C -i <path_to_images_directory_or_txt_file> -m <path_to_classification_model>/<model_name>.xml -d <CPU|GPU> -subset 2000 +``` + +The dataset must have the correct format. Classification models support two formats: folders +named as labels that contain all images of this class and ImageNet*-like format, with the +`.txt` file containing list of images and IDs of classes. + +For more information on the structure of the datasets, refer to the **Prepare a Dataset** section of the +[Validation Application document](./samples/validation_app/README.md). + +If you decide to use the subset of the given dataset, use the ImageNet-like format +instead of "folder as classes" format. This brings a more accurate calibration as you are likely to get images +representing different classes. + +For example, to calibrate the pretrained TensorFlow\* `inception_v4_tf.xml` classification model, +run the following command: + +```bash +./calibration_tool -t C -m inception_v4_tf.xml -i ILSVRC2012_val.txt -Czb false -ppType "ResizeCrop" -ppSize 342 -b 1 -d CPU -subset 2000 +``` + +## Calibrate Object Detection Model + +This topic demonstrates how to run the Calibration Tool on the Object Detection CNN on a set of images. Please +review the list of Object Detection models used for validation of the Calibration Tool +in the [8-bit Inference Introduction](./docs/Inference_Engine_Developer_Guide/Int8Inference.md). +Any network that can be inferred with the Inference Engine and has the same input and output +format as the SSD CNN should be supported as well. + +### Run SSD Network on the VOC dataset + +Before you start calibrating the model, make sure your dataset is in the correct format. For more information, +refer to the **Prepare a Dataset** section of the +[Validation Application document](./samples/validation_app/README.md). + +Once you have prepared the dataset, you can calibrate the model on it by running the following command: +```bash +./calibration_tool -d CPU -t OD -ODa "<path_to_image_annotations>/VOCdevkit/VOC2007/Annotations" -i "<path_to_image_directory>/VOCdevkit" -m "<path_to_model>/vgg_voc0712_ssd_300x300.xml" -ODc "<path_to_classes_list>/VOC_SSD_Classes.txt" -ODsubdir JPEGImages -subset 500 +``` + +## See Also + +* [Using Inference Engine Samples](./docs/Inference_Engine_Developer_Guide/Samples_Overview.md) diff --git a/inference-engine/samples/calibration_tool/calibrator_processors.cpp b/inference-engine/samples/calibration_tool/calibrator_processors.cpp new file mode 100644 index 000000000..21ce5f160 --- /dev/null +++ b/inference-engine/samples/calibration_tool/calibrator_processors.cpp @@ -0,0 +1,847 @@ +// Copyright (C) 2018 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +#include "calibrator_processors.h" +#include <string> // std::string +#include <iostream> // std::cout +#include <sstream> // std::stringstream +#include <iomanip> +#include <algorithm> +#include <map> +#include <memory> +#include <utility> +#include <list> +#include "details/ie_cnn_network_tools.h" +#include "details/caseless.hpp" + +using namespace InferenceEngine; +using namespace InferenceEngine::details; + +using InferenceEngine::details::InferenceEngineException; + +CNNLayerPtr Int8Calibrator::addScaleShiftBeforeLayer(std::string name, CNNLayer::Ptr beforeLayer, size_t port, std::vector<float> scale) { + if (beforeLayer->insData.size() < port) { + THROW_IE_EXCEPTION << "cannot find appropraite port for addScaleShiftBeforeLayer"; + } + + DataPtr pData = beforeLayer->insData[port].lock(); + LayerParams params; + params.name = name; + params.precision = Precision::FP32; + params.type = "ScaleShift"; + CNNLayerPtr lptr = std::make_shared<ScaleShiftLayer>(params); + ScaleShiftLayer *pScaleShift = dynamic_cast<ScaleShiftLayer *>(lptr.get()); + + SizeVector wdims({ pData->dims[2] }); + + if (scale.size() == 1) { + scale.resize(wdims[0]); + for (int i = 1; i < wdims[0]; i++) { + scale[i] = scale[0]; + } + } + + if (scale.size() != pData->dims[2]) { + THROW_IE_EXCEPTION << "Failed to add scaleshift before " << beforeLayer->name << " due to scales and layer output dims incossitency"; + } + + Blob::Ptr weights = nullptr; + weights = make_shared_blob<float>(Precision::FP32, Layout::C, wdims); + weights->allocate(); + float *buffer = weights->buffer().as<float *>(); + if (buffer == nullptr) { + THROW_IE_EXCEPTION << "Could not allocate weights buffer"; + } + for (size_t i = 0, idx = 0; i < pData->dims[2]; i++) { + buffer[i] = scale[i]; + } + pScaleShift->_weights = weights; + + + SizeVector bdims({ pData->dims[2] }); + Blob::Ptr biases = nullptr; + biases = make_shared_blob<float>(Precision::FP32, Layout::C, bdims); + biases->allocate(); + buffer = biases->buffer().as<float *>(); + for (size_t i = 0, idx = 0; i < pData->dims[2]; i++) { + buffer[i] = 0.f; + } + pScaleShift->_biases = biases; + + Data *edge2 = new Data(*pData.get()); + DataPtr newEdge(edge2); + lptr->insData.push_back(pData); + lptr->outData.push_back(newEdge); + newEdge->name = /*"EdgeAfter_" +*/ params.name; + newEdge->creatorLayer = lptr; + newEdge->inputTo.clear(); + newEdge->inputTo[beforeLayer->name] = beforeLayer; + + pData->inputTo.erase(beforeLayer->name); + pData->inputTo[params.name] = lptr; + + for (size_t i = 0; i < beforeLayer->insData.size(); i++) { + DataPtr d = beforeLayer->insData[i].lock(); + if (d == pData) { + beforeLayer->insData[i] = newEdge; + break; + } + } + return lptr; +} + + +float Int8Calibrator::compare_NRMSD(InferenceEngine::Blob::Ptr res, InferenceEngine::Blob::Ptr ref) { + float *res_ptr = res->buffer().as<float *>(); + size_t res_size = res->size(); + + float *ref_ptr = ref->buffer().as<float *>(); + size_t ref_size = ref->size(); + + float sum = 0; + + float mmin = ref_ptr[0], mmax = ref_ptr[0]; + + for (size_t i = 0; i < ref_size; i++) { + float sqr = (ref_ptr[i] - res_ptr[i]); + sqr *= sqr; + sum += sqr; + + mmin = std::min(mmin, ref_ptr[i]); + mmax = std::max(mmax, ref_ptr[i]); + } + sum /= ref_size; + + sum = pow(sum, 0.5); + + sum /= mmax - mmin; + + return sum; +} + + +InferenceEngine::NetworkStatsMap Int8Calibrator::getStatistic(float threshold) { + InferenceEngine::NetworkStatsMap netNodesStats; + // go over all outputs and get aggregated statistics + for (auto l : _statData.registeredLayers()) { + NetworkNodeStatsPtr nodeStats; + size_t channels = _statData.getNumberChannels(l); + if (netNodesStats.find(l) == netNodesStats.end()) { + nodeStats = NetworkNodeStatsPtr(new NetworkNodeStats(channels)); + + netNodesStats[l] = nodeStats; + } else { + nodeStats = netNodesStats[l]; + } + for (size_t c = 0; c < channels; c++) { + _statData.getDataMinMax(l, c, nodeStats->_minOutputs[c], nodeStats->_maxOutputs[c], threshold); + } + } + return netNodesStats; +} + + +void Int8Calibrator::collectFP32Statistic() { + _collectByLayer = false; + _collectStatistic = true; + + networkReaderC = InferenceEngine::CNNNetReader(); + networkReaderC.ReadNetwork(_modelFileNameI8C); + if (!networkReaderC.isParseSuccess()) THROW_IE_EXCEPTION << "cannot load a failed Model"; + if (_cBatch == 0) { + // Zero means "take batch value from the IR" + _cBatch = networkReaderC.getNetwork().getBatchSize(); + } else { + // Not zero means "use the specified value" + networkReaderC.getNetwork().setBatchSize(_cBatch); + } + + /** Extract model name and load weights **/ + std::string binFileName = fileNameNoExt(_modelFileNameI8C) + ".bin"; + networkReaderC.ReadWeights(binFileName.c_str()); + + auto network = networkReaderC.getNetwork(); + + + std::vector<CNNLayerPtr> layersAfterInputs; + + std::string hackPrefix = "scaleshifted_input:"; + + for (auto &&layer : network) { + if (layer->insData.size() > 0) { + std::string inName = layer->input()->getName(); + for (auto &&input : network.getInputsInfo()) { + if (inName == input.first) { + layersAfterInputs.push_back(layer); + _inputsFromLayers[hackPrefix + layer->name] = inName; + } + } + } + } + + for (auto &&layer : layersAfterInputs) { + std::string firstInputName = hackPrefix + layer->name; + auto scaleShiftLayer = addScaleShiftBeforeLayer(firstInputName, layer, 0, { 1.f }); + ((ICNNNetwork&)network).addLayer(scaleShiftLayer); + } + + + // 1. add all layers as output one + for (auto &&layer : network) { + std::string layerType = network.getLayerByName(layer->name.c_str())->type; + if (/*layerType != "Split" &&*/layerType != "Input") { + network.addOutput(layer->name); + } + _statData.registerLayer(layer->name); + } + + ExecutableNetwork executable_network = _pluginI8C.LoadNetwork(network, { { CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(YES) } }); + _inferRequestI8C = executable_network.CreateInferRequest(); +} + +void Int8Calibrator::validateInt8Config(const InferenceEngine::NetworkStatsMap &stat, + const std::map<std::string, bool> &layersToInt8) { + _collectByLayer = false; + _collectStatistic = false; + networkReaderC = InferenceEngine::CNNNetReader(); + networkReaderC.ReadNetwork(_modelFileNameI8C); + if (!networkReaderC.isParseSuccess()) THROW_IE_EXCEPTION << "cannot load a failed Model"; + if (_cBatch == 0) { + // Zero means "take batch value from the IR" + _cBatch = networkReaderC.getNetwork().getBatchSize(); + } else { + // Not zero means "use the specified value" + networkReaderC.getNetwork().setBatchSize(_cBatch); + } + + /** Extract model name and load weights **/ + std::string binFileName = fileNameNoExt(_modelFileNameI8C) + ".bin"; + networkReaderC.ReadWeights(binFileName.c_str()); + + // Initialize statistic + ICNNNetworkStats *pstats = nullptr; + StatusCode s = ((ICNNNetwork&)networkReaderC.getNetwork()).getStats(&pstats, nullptr); + if (s == StatusCode::OK && pstats) { + pstats->setNodesStats(stat); + } + + auto network = networkReaderC.getNetwork(); + for (auto l : layersToInt8) { + network.getLayerByName(l.first.c_str())-> + params["quantization_level"] = (l.second == false) ? "FP32" : "I8"; + } + + ExecutableNetwork executable_network = _pluginI8C.LoadNetwork(network, { { CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(YES) } }); + _inferRequestI8C = executable_network.CreateInferRequest(); +} + +CNNNetwork Int8Calibrator::createICNNNetworkForLayer(CNNLayer::Ptr layerToClone) { + CNNLayer::Ptr layerRelU = layerToClone->outData[0]->inputTo.begin()->second; + + InferenceEngine::CNNNetReader reader1; + std::string inpuitName = layerToClone->insData[0].lock()->name; + std::string model = "<net name=\"L\" version=\"2\" batch=\"1\"><layers> " \ + "<layer name=\"" + + inpuitName + + "\" type=\"Input\" precision=\"FP32\" id=\"0\"> "\ + "<output>"\ + "<port id=\"0\">"\ + "<dim>1</dim>"\ + "<dim>3</dim>"\ + "<dim>224</dim>"\ + "<dim>224</dim>"\ + "</port>"\ + "</output>"\ + "</layer>" \ + "<layer name=\"" + + layerToClone->name + + "\" type=\"Convolution\" precision=\"FP32\" id=\"1\">" \ + "<convolution_data stride-x=\"2\" stride-y=\"2\" pad-x=\"3\" pad-y=\"3\" kernel-x=\"7\" kernel-y=\"7\" output=\"64\" group=\"1\" />"\ + "<input>"\ + "<port id=\"1\">"\ + "<dim>1</dim>"\ + "<dim>3</dim>"\ + "<dim>224</dim>"\ + "<dim>224</dim>"\ + "</port>"\ + "</input>"\ + "<output>"\ + "<port id=\"2\">"\ + "<dim>1</dim>"\ + "<dim>64</dim>"\ + "<dim>112</dim>"\ + "<dim>112</dim>"\ + "</port>"\ + "</output>"\ + "</layer>"\ + "<layer name=\"" + + layerRelU->name + + "\" type=\"ReLU\" precision=\"FP32\" id=\"2\">"\ + "<input>" + "<port id=\"3\">"\ + "<dim>1</dim>"\ + "<dim>64</dim>"\ + "<dim>112</dim>"\ + "<dim>112</dim>"\ + "</port>"\ + "</input>"\ + "<output>"\ + "<port id=\"4\">"\ + "<dim>1</dim>"\ + "<dim>64</dim>"\ + "<dim>112</dim>"\ + "<dim>112</dim>"\ + "</port>"\ + "</output>"\ + "</layer>"\ + "<layer name=\"" + + layerToClone->name + + "_\" type=\"ScaleShift\" precision=\"FP32\" id=\"3\">"\ + "<input>" + "<port id=\"5\">"\ + "<dim>1</dim>"\ + "<dim>64</dim>"\ + "<dim>112</dim>"\ + "<dim>112</dim>"\ + "</port>"\ + "</input>"\ + "<output>"\ + "<port id=\"6\">"\ + "<dim>1</dim>"\ + "<dim>64</dim>"\ + "<dim>112</dim>"\ + "<dim>112</dim>"\ + "</port>"\ + "</output>"\ + "</layer>"\ + "</layers> <edges>"\ + "<edge from-layer=\"0\" from-port=\"0\" to-layer=\"1\" to-port=\"1\"/> "\ + "<edge from-layer=\"1\" from-port=\"2\" to-layer=\"2\" to-port=\"3\"/> "\ + "<edge from-layer=\"2\" from-port=\"4\" to-layer=\"3\" to-port=\"5\"/> "\ + "</edges></net>"; + + reader1.ReadNetwork(model.c_str(), model.length()); + ICNNNetwork &n = reader1.getNetwork(); + + InferenceEngine::InputsDataMap inputs; + n.getInputsInfo(inputs); + CNNLayerPtr inputLayer = inputs.begin()->second->getInputData()->creatorLayer.lock(); + + CNNLayerPtr convLayer; + n.getLayerByName(layerToClone->name.c_str(), convLayer, nullptr); + ConvolutionLayer *pConvS = dynamic_cast<ConvolutionLayer *>(layerToClone.get()); + ConvolutionLayer *pConvT = dynamic_cast<ConvolutionLayer *>(convLayer.get()); + pConvT->_kernel_x = pConvS->_kernel_x; + pConvT->_kernel_y = pConvS->_kernel_y; + pConvT->_stride_x = pConvS->_stride_x; + pConvT->_stride_y = pConvS->_stride_y; + pConvT->_out_depth = pConvS->_out_depth; + pConvT->_padding_x = pConvS->_padding_x; + pConvT->_padding_y = pConvS->_padding_y; + pConvT->_dilation_x = pConvS->_dilation_x; + pConvT->_dilation_y = pConvS->_dilation_y; + pConvT->_group = pConvS->_group; + pConvT->_weights = pConvS->_weights; + pConvT->_biases = pConvS->_biases; + pConvT->blobs = pConvS->blobs; + + std::shared_ptr<Data> cur = layerToClone->insData[0].lock(); + if (cur == nullptr) { + THROW_IE_EXCEPTION << "[Samples] shared ptr layerToClone->insData[0].lock() return nullptr"; + } + DataPtr inputEdge = std::make_shared<Data>(*cur.get()); + + inputEdge->getInputTo().clear(); + inputEdge->name = inpuitName; + inputEdge->creatorLayer = inputLayer; + inputEdge->inputTo[layerToClone->name] = convLayer; + inputEdge->getInputTo().clear(); + inputEdge->inputTo[layerToClone->name] = convLayer; + + inputs.begin()->second->setInputData(inputEdge); + + convLayer->insData.clear(); + convLayer->insData.push_back(inputEdge); + + inputLayer->outData.clear(); + inputLayer->outData.push_back(inputEdge); + + DataPtr convEdge = std::make_shared<Data>(*layerToClone->outData[0].get()); + convEdge->getInputTo().clear(); + convEdge->creatorLayer = convLayer; + convEdge->name = convLayer->name; + convLayer->outData.clear(); + convLayer->outData.push_back(convEdge); + + CNNLayerPtr reluLayer; + n.getLayerByName(layerRelU->name.c_str(), reluLayer, nullptr); + DataPtr reluEdge = std::make_shared<Data>(*layerRelU->outData[0].get()); + reluEdge->getInputTo().clear(); + reluEdge->creatorLayer = reluLayer; + reluEdge->name = reluLayer->name; + reluLayer->insData.clear(); + reluLayer->insData.push_back(convEdge); + reluLayer->outData.clear(); + reluLayer->outData.push_back(reluEdge); + + convEdge->inputTo[reluLayer->name] = reluLayer; + + CNNLayerPtr ssLayer; + std::string ssLayerName = convLayer->name + "_"; + n.getLayerByName(ssLayerName.c_str(), ssLayer, nullptr); + DataPtr ssEdge = std::make_shared<Data>(*layerRelU->outData[0].get()); + ssEdge->getInputTo().clear(); + ssEdge->creatorLayer = ssLayer; + ssEdge->name = ssLayer->name; + ssLayer->insData.clear(); + ssLayer->insData.push_back(reluEdge); + ssLayer->outData.clear(); + ssLayer->outData.push_back(ssEdge); + + reluEdge->inputTo[ssLayer->name] = ssLayer; + + n.addOutput(ssLayer->name); + + // filling weights and biases + size_t channels = ssEdge->getTensorDesc().getDims()[1]; + Blob::Ptr weights = nullptr; + SizeVector wdims; + wdims.push_back(channels); + weights = make_shared_blob<float, const SizeVector>(Precision::FP32, Layout::C, wdims); + weights->allocate(); + float *dataw = weights->buffer().as<float *>(); + for (size_t i = 0; i < channels; i++) { + dataw[i] = 1.0f; + } + ssLayer->blobs["weights"] = weights; + + Blob::Ptr biases = nullptr; + SizeVector bdims; + bdims.push_back(channels); + biases = make_shared_blob<float, const SizeVector>(Precision::FP32, Layout::C, bdims); + biases->allocate(); + float *datab = biases->buffer().as<float *>(); + for (size_t i = 0; i < channels; i++) { + datab[i] = 0.0f; + } + ssLayer->blobs["biases"] = biases; + + auto wss = dynamic_cast<WeightableLayer*>(ssLayer.get()); + wss->_weights = weights; + wss->_biases = biases; + + return reader1.getNetwork(); +} + +void Int8Calibrator::collectByLayerStatistic(const InferenceEngine::NetworkStatsMap &stat) { + _collectByLayer = true; + _collectStatistic = false; + networkReaderC = InferenceEngine::CNNNetReader(); + networkReaderC.ReadNetwork(_modelFileNameI8C); + if (!networkReaderC.isParseSuccess()) THROW_IE_EXCEPTION << "cannot load a failed Model"; + if (_cBatch != 0) { + networkReaderC.getNetwork().setBatchSize(_cBatch); + } + + /** Extract model name and load weights **/ + std::string binFileName = fileNameNoExt(_modelFileNameI8C) + ".bin"; + networkReaderC.ReadWeights(binFileName.c_str()); + + auto network = networkReaderC.getNetwork(); + // 1. add all layers as output one + for (auto &&layer : network) { + std::string layerType = network.getLayerByName(layer->name.c_str())->type; + if (/*layerType != "Split" &&*/layerType != "Input") { + network.addOutput(layer->name); + } + + if (layerType == "Convolution") { + _layersAccuracyDrop[layer->name] = 0.f; + } + } + + ExecutableNetwork executable_network = _pluginI8C.LoadNetwork(network, { { CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(YES) } }); + _inferRequestI8C = executable_network.CreateInferRequest(); + + // 2. go over all layers which affect accuracy and create network basing on it + for (auto l : _layersAccuracyDrop) { + CNNLayerPtr layerToClone = network.getLayerByName(l.first.c_str()); + CNNLayerPtr layerRelU = nullptr; + // verification if there is Conv-RELU patern + // currently it is only supported + + // if only one output from conv and if it is an output to relu + bool quattization = false; + if (layerToClone->outData.size() == 1 && layerToClone->outData[0]->inputTo.size() == 1) { + layerRelU = layerToClone->outData[0]->inputTo.begin()->second; + if (layerRelU->type == "ReLU") { + quattization = true; + } + } + + if (quattization) { + CNNNetwork n = createICNNNetworkForLayer(layerToClone); + if (_cBatch != 0) { + n.setBatchSize(_cBatch); + } + + // Initialize statistic + ICNNNetworkStats *pstats = nullptr; + ICNNNetwork &in = n; + StatusCode s = in.getStats(&pstats, nullptr); + if (s == StatusCode::OK && pstats) { + pstats->setNodesStats(stat); + } + + InferenceEngine::InputsDataMap inputs = n.getInputsInfo(); + DataPtr q = inputs.begin()->second->getInputData(); + + ExecutableNetwork enetwork = _pluginI8C.LoadNetwork(n, { { CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(YES) } }); + _singleLayerNetworks.push_back(enetwork); + InferenceEngine::InferRequest request = enetwork.CreateInferRequest(); + std::string inpuitName = layerToClone->insData[0].lock()->name; + request.SetBlob(inpuitName, _inferRequestI8C.GetBlob(inpuitName)); + _singleLayerRequests[layerToClone->name] = { request, layerRelU->name, layerToClone->name }; + } + } +} + + +void Int8Calibrator::collectCalibrationStatistic() { + if (_collectByLayer) { + std::map<std::string, SingleLayerData>::iterator it = _singleLayerRequests.begin(); + while (it != _singleLayerRequests.end()) { + it->second._request.Infer(); + Blob::Ptr expected = _inferRequestI8C.GetBlob(it->second._outputName); + std::string i8Out = it->second._outputI8Name + "_"; + Blob::Ptr result = it->second._request.GetBlob(i8Out.c_str()); + float diff = compare_NRMSD(result, expected); + it->second._int8Accuracy.push_back(diff); + it++; + } + } + if (_collectStatistic) { + for (auto l : _statData.registeredLayers()) { + auto outBlob = _inferRequestI8C.GetBlob(l); + + std::string outName = l; + if (_inputsFromLayers.find(l) != _inputsFromLayers.end()) { + outName = _inputsFromLayers[l]; + } + + size_t N, C, statCount; + if (outBlob->dims().size() == 4 && outBlob->layout() == Layout::NCHW) { + N = outBlob->dims()[3]; + C = outBlob->dims()[2]; + statCount = C; + } else if (outBlob->dims().size() == 2 && outBlob->layout() == Layout::NC) { + N = outBlob->dims()[1]; + C = outBlob->dims()[0]; + statCount = 1; + } else { + continue; + } + + // Counting min/max outputs per channel + for (size_t n = 0; n < N; n++) { + if (outBlob->dims().size() == 4) { + size_t _HW = outBlob->dims()[0] * outBlob->dims()[1]; + for (size_t c = 0; c < C; c++) { + if (outBlob->getTensorDesc().getPrecision() == Precision::FP32) { + float *ptr = &outBlob->buffer().as<float *>()[(n * C + c) * _HW]; + _statData.addTensorStatistics(outName, c, ptr, _HW); + } else if (outBlob->getTensorDesc().getPrecision() == Precision::U8) { + uint8_t *ptr = &outBlob->buffer().as<uint8_t *>()[(n * C + c) * _HW]; + _statData.addTensorStatistics(outName, c, ptr, _HW); + } else { + throw std::logic_error(std::string("Unsupported precision: ") + outBlob->getTensorDesc().getPrecision().name()); + } + } + } else if (outBlob->dims().size() == 2) { + if (outBlob->getTensorDesc().getPrecision() == Precision::FP32) { + float *ptr = &outBlob->buffer().as<float *>()[n * C]; + _statData.addTensorStatistics(outName, 0, ptr, C); + } else if (outBlob->getTensorDesc().getPrecision() == Precision::U8) { + uint8_t *ptr = &outBlob->buffer().as<uint8_t *>()[n * C]; + _statData.addTensorStatistics(outName, 0, ptr, C); + } else { + throw std::logic_error(std::string("Unsupported precision: ") + outBlob->getTensorDesc().getPrecision().name()); + } + } + } + } + } +} + +void Int8Calibrator::calculateLayersAccuracyDrop() { + _layersAccuracyDrop.clear(); + + std::map<std::string, SingleLayerData>::iterator it = _singleLayerRequests.begin(); + while (it != _singleLayerRequests.end()) { + // calculate average metric per layer over all images and sort in desc order + float mo = 0.f; + for (auto d : it->second._int8Accuracy) { + mo += d; + } + mo = mo / it->second._int8Accuracy.size(); + _layersAccuracyDrop[it->first] = mo; + it++; + } + + // correction of accuracy drop to have sorted values for cases when accuracy drop is equal + // correction is added according to topological order + // this will prioritize returning of layers to FP32 starting from layers closer to the end of network + std::vector<CNNLayerPtr> ordered = InferenceEngine::details::CNNNetSortTopologically(networkReaderC.getNetwork()); + float c = 0.00001f; + for (auto l : ordered) { + auto it = _layersAccuracyDrop.find(l->name); + if (it != _layersAccuracyDrop.end()) { + it->second += c; + } + c += 0.00001f; + } + _singleLayerRequests.clear(); +} + +std::map<std::string, float> Int8Calibrator::layersAccuracyDrop() { + return _layersAccuracyDrop; +} + + + +//-------------------------------------------------------------------------------------------------- + +ClassificationCalibrator::ClassificationCalibrator(int nPictures, const std::string &flags_m, + const std::string &flags_d, const std::string &flags_i, + int flags_b, InferenceEngine::InferencePlugin plugin, + CsvDumper &dumper, const std::string &flags_l, + PreprocessingOptions preprocessingOptions, bool zeroBackground) : + ClassificationProcessor(flags_m, flags_d, flags_i, flags_b, + plugin, dumper, flags_l, + preprocessingOptions, zeroBackground) { + _modelFileNameI8C = modelFileName; + _pluginI8C = plugin; + _nPictures = nPictures; + _cBatch = flags_b; +} + +shared_ptr<Processor::InferenceMetrics> ClassificationCalibrator::Process() { + inferRequest = _inferRequestI8C; + int top1Result = 0, total = 0; + + ClassificationSetGenerator generator; + + auto validationMap = generator.getValidationMap(imagesPath); + ImageDecoder decoder; + + // ----------------------------Do inference------------------------------------------------------------- + std::vector<int> expected(batch); + std::vector<std::string> files(batch); + int captured = 0; + + if (!_nPictures) { + _nPictures = validationMap.size(); + } + + + ConsoleProgress progress(_nPictures); + + CalibrationMetrics im; + + std::string firstInputName = this->inputInfo.begin()->first; + std::string firstOutputName = this->outInfo.begin()->first; + auto firstInputBlob = inferRequest.GetBlob(firstInputName); + auto firstOutputBlob = inferRequest.GetBlob(firstOutputName); + + size_t ipics = 0; + auto iter = validationMap.begin(); + while (iter != validationMap.end() && ipics < _nPictures) { + int b = 0; + int filesWatched = 0; + for (; b < batch && iter != validationMap.end() && ipics + b < _nPictures ; b++, iter++, filesWatched++) { + expected[b] = iter->first; + try { + decoder.insertIntoBlob(iter->second, b, *firstInputBlob, preprocessingOptions); + files[b] = iter->second; + } catch (const InferenceEngineException &iex) { + slog::warn << "Can't read file " << iter->second << slog::endl; + // Could be some non-image file in directory + b--; + continue; + } + } + ipics += batch; + + Infer(progress, filesWatched, im); + collectCalibrationStatistic(); + + std::vector<unsigned> results; + auto firstOutputData = firstOutputBlob->buffer().as<PrecisionTrait<Precision::FP32>::value_type *>(); + InferenceEngine::TopResults(1, *firstOutputBlob, results); + + for (int i = 0; i < b; i++) { + int expc = expected[i]; + if (zeroBackground) expc++; + bool top1Scored = (results[i] == expc); + if (top1Scored) top1Result++; + total++; + } + } + progress.finish(); + + calculateLayersAccuracyDrop(); + + im.AccuracyResult = static_cast<float>(top1Result) / static_cast<float>(total); + + return std::shared_ptr<Processor::InferenceMetrics>(new CalibrationMetrics(im)); +} + +//-------------------------------------------------------------------------------------------------- +SSDObjectDetectionCalibrator::SSDObjectDetectionCalibrator(int nPictures, const std::string &flags_m, + const std::string &flags_d, const std::string &flags_i, + const std::string &subdir, int flags_b, + double threshold, + InferencePlugin plugin, CsvDumper &dumper, + const std::string &flags_a, const std::string &classes_list_file) : + SSDObjectDetectionProcessor(flags_m, flags_d, flags_i, subdir, flags_b, + threshold, + plugin, dumper, + flags_a, classes_list_file) { + _modelFileNameI8C = modelFileName; + _pluginI8C = plugin; + _nPictures = nPictures; +} + +shared_ptr<Processor::InferenceMetrics> SSDObjectDetectionCalibrator::Process() { + inferRequest = _inferRequestI8C; + + // Parsing PASCAL VOC2012 format + VOCAnnotationParser vocAnnParser; + VOCAnnotationCollector annCollector(annotationsPath); + + if (annCollector.annotations().size() == 0) { + ObjectDetectionInferenceMetrics emptyIM(this->threshold); + + return std::shared_ptr<InferenceMetrics>(new ObjectDetectionInferenceMetrics(emptyIM)); + } + + // Getting desired results from annotations + std::map<std::string, ImageDescription> desiredForFiles; + + for (auto &ann : annCollector.annotations()) { + std::list<DetectedObject> dobList; + for (auto &obj : ann.objects) { + DetectedObject dob(classes[obj.name], obj.bndbox.xmin, obj.bndbox.ymin, obj.bndbox.xmax, obj.bndbox.ymax, 1.0, obj.difficult != 0); + dobList.push_back(dob); + } + ImageDescription id(dobList); + desiredForFiles.insert(std::pair<std::string, ImageDescription>(ann.folder + "/" + (!subdir.empty() ? subdir + "/" : "") + ann.filename, id)); + } + + + ImageDecoder decoder; + + const int maxProposalCount = outputDims[1]; + const int objectSize = outputDims[0]; + + for (auto &item : outInfo) { + DataPtr outputData = item.second; + if (!outputData) { + throw std::logic_error("output data pointer is not valid"); + } + } + // ----------------------------------------------------------------------------------------------------- + + // ----------------------------Do inference------------------------------------------------------------- + + std::vector<VOCAnnotation> expected(batch); + + if (!_nPictures) { + _nPictures = annCollector.annotations().size(); + } + + ConsoleProgress progress(_nPictures); + + ObjectDetectionInferenceMetrics im(threshold); + + vector<VOCAnnotation>::const_iterator iter = annCollector.annotations().begin(); + + std::map<std::string, ImageDescription> scaledDesiredForFiles; + + std::string firstInputName = this->inputInfo.begin()->first; + auto firstInputBlob = inferRequest.GetBlob(firstInputName); + size_t ipics = 0; + + while (iter != annCollector.annotations().end() && ipics < _nPictures) { + std::vector<std::string> files; + int b = 0; + + int filesWatched = 0; + for (; b < batch && iter != annCollector.annotations().end(); b++, iter++, filesWatched++) { + expected[b] = *iter; + string filename = iter->folder + "/" + (!subdir.empty() ? subdir + "/" : "") + iter->filename; + try { + Size orig_size = decoder.insertIntoBlob(std::string(imagesPath) + "/" + filename, b, *firstInputBlob, preprocessingOptions); + float scale_x, scale_y; + + scale_x = 1.0 / iter->size.width; // orig_size.width; + scale_y = 1.0 / iter->size.height; // orig_size.height; + + if (scaleProposalToInputSize) { + scale_x *= firstInputBlob->dims()[0]; + scale_y *= firstInputBlob->dims()[1]; + } + + // Scaling the desired result (taken from the annotation) to the network size + scaledDesiredForFiles.insert(std::pair<std::string, ImageDescription>(filename, desiredForFiles.at(filename).scale(scale_x, scale_y))); + + files.push_back(filename); + } catch (const InferenceEngineException &iex) { + slog::warn << "Can't read file " << this->imagesPath + "/" + filename << slog::endl; + // Could be some non-image file in directory + b--; + continue; + } + ipics++; + } + + if (files.size() == batch) { + InferenceEngine::StatusCode sts; + InferenceEngine::ResponseDesc dsc; + + // Infer model + Infer(progress, filesWatched, im); + collectCalibrationStatistic(); + + // Processing the inference result + std::map<std::string, std::list<DetectedObject>> detectedObjects = processResult(files); + + // Calculating similarity + // + for (int b = 0; b < files.size(); b++) { + ImageDescription result(detectedObjects[files[b]]); + im.apc.consumeImage(result, scaledDesiredForFiles.at(files[b])); + } + } + } + progress.finish(); + + calculateLayersAccuracyDrop(); + + CalibrationMetrics imCalibration; + const ObjectDetectionInferenceMetrics &odim = dynamic_cast<const ObjectDetectionInferenceMetrics&>(im); + if (im.nRuns > 0) { + std::map<int, double> appc = odim.apc.calculateAveragePrecisionPerClass(); + + double mAP = 0; + for (auto i : appc) { + mAP += i.second; + } + imCalibration.AccuracyResult = mAP / appc.size(); + } + return std::shared_ptr<Processor::InferenceMetrics>(new CalibrationMetrics(imCalibration)); +} + + diff --git a/inference-engine/samples/calibration_tool/calibrator_processors.h b/inference-engine/samples/calibration_tool/calibrator_processors.h new file mode 100644 index 000000000..f533e33ad --- /dev/null +++ b/inference-engine/samples/calibration_tool/calibrator_processors.h @@ -0,0 +1,178 @@ +// Copyright (C) 2018 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include <vector> +#include <string> +#include "inference_engine.hpp" +#include "ClassificationProcessor.hpp" +#include "SSDObjectDetectionProcessor.hpp" +#include "data_stats.h" +#include <map> +#include <memory> + +/** + * Calibrator class representing unified stages for calibration of any kind of networks +*/ +class Int8Calibrator { +public: + /** + * Intermediate structure storing of data for measurements of by-layer statistic of accuracy drop + */ + struct SingleLayerData { + InferenceEngine::InferRequest _request; + std::string _outputName; + std::string _outputI8Name; + std::vector<float> _int8Accuracy; + }; + + /** + * Initializes state to collect accuracy of FP32 network and collect statistic + * of activations. The statistic of activations is stored in _statData and has all max/min for all + * layers and for all pictures + * The inference of all pictures and real collect of the statistic happen during call of + * Processor::Process() + */ + void collectFP32Statistic(); + + /** + * Initializes a state to collect intermediate numeric accuracy drop happening during quantization of + * certain layer to int8. The numeric accuracy drop is measured using NRMSD metric. + * + * For this purpose it creates dedicated network for certain layer, initializes this + * network by statistic that cause execute dedicated network in int8 mode. + * + * In addition to original network we create full original network executed in FP32 mode, and + * register all layers as output ones. + * Information from these layers is used as + * a) input to dedicated layer networks + * b) comparison for NRMSD algorithm between I8 and FP32 calc + * + * The inference of all pictures and real collect of the drop happen during call of + * Processor::Process() + * @param stat + */ + void collectByLayerStatistic(const InferenceEngine::NetworkStatsMap &stat); + + /** + * Initialize state to collect accuracy drop in int8 mode to be compared later vs FP32 accuracy + * metric. + * + * The inference of all pictures and real collect of the accuracy happen during call of + * Processor::Process() + * + * @param stat - The statistic for normalization + * @param layersToInt8 - list of layers planned to be executed in int8. if layer is absent in this + * map, it is assumed that it will be executed in int8 + */ + void validateInt8Config(const InferenceEngine::NetworkStatsMap &stat, + const std::map<std::string, bool>& layersToInt8); + + /** + * Statistic collected in the collectFP32Statistic is processed with threshold passed as a parameter + * for this method. All values for each layers and for all pictures are sorted and number of min/max + * values which exceed threshold is thrown off + * @param threshold - parameter for thrown off outliers in activation statistic + * @return InferenceEngine::NetworkStatsMap - mapping of layer name to NetworkNodeStatsPtr + */ + InferenceEngine::NetworkStatsMap getStatistic(float threshold); + + /** + * returns by-layer accuracy drop container + */ + std::map<std::string, float> layersAccuracyDrop(); + +protected: + /** + * This function should be called from final callibrator after and each Infer for each picture + * It calculates by layer accuracy drop and as well it also collect activation values statistic + */ + void collectCalibrationStatistic(); + + /** + * This function should be called from calibration class after Infer of all picture + * It calculates average NRMSD based accuracy drop for each layer and fills _layersAccuracyDrop + */ + void calculateLayersAccuracyDrop(); + + bool _collectByLayer = false; + bool _collectStatistic = true; + InferencePlugin _pluginI8C; + std::string _modelFileNameI8C; + InferenceEngine::CNNNetReader networkReaderC; + InferenceEngine::InferRequest _inferRequestI8C; + int _cBatch = 0; + + int _nPictures; + +private: + /** + * helper function for getting statistic for input layers. For getting statistic for them, we are + * adding scalshift just after the input with scale == 1 and shift == 0 + */ + CNNLayerPtr addScaleShiftBeforeLayer(std::string name, InferenceEngine::CNNLayer::Ptr beforeLayer, + size_t port, std::vector<float> scale); + + /** + * Returns Normalized root-mean-square deviation metric for two blobs passed to the function + */ + float compare_NRMSD(InferenceEngine::Blob::Ptr res, InferenceEngine::Blob::Ptr ref); + + /** + * Creates dedicated i8 network around selected layer. Currently this network beside layer itself + * has to have ReLU and ScaleShift layers. + * Since Inference Engine API mostly directed to the loading of network from IR, we need to create + * such IR first, read through stream and modify network to correspond required parameters + */ + InferenceEngine::CNNNetwork createICNNNetworkForLayer(InferenceEngine::CNNLayer::Ptr layerToClone); + + std::map<std::string, float> _layersAccuracyDrop; + std::vector<InferenceEngine::ExecutableNetwork> _singleLayerNetworks; + std::map<std::string, SingleLayerData> _singleLayerRequests; + std::map<std::string, std::string> _inputsFromLayers; + AggregatedDataStats _statData; +}; + +/** + * This class represents the only one generalized metric which will be used for comparison of + * accuracy drop + */ +struct CalibrationMetrics : public ClassificationProcessor::InferenceMetrics { +public: + float AccuracyResult = 0; +}; + +/** + * Сalibration class for classification networks. + * Responsible for proper post processing of results and calculate of Top1 metric which is used as + * universal metric for accuracy and particiapted in verification of accuracy drop + */ +class ClassificationCalibrator : public ClassificationProcessor, public Int8Calibrator { +public: + ClassificationCalibrator(int nPictures, const std::string &flags_m, const std::string &flags_d, + const std::string &flags_i, int flags_b, + InferenceEngine::InferencePlugin plugin, CsvDumper &dumper, const std::string &flags_l, + PreprocessingOptions preprocessingOptions, bool zeroBackground); + + shared_ptr<InferenceMetrics> Process()override; +}; + + +/** +* Calibration class for SSD object detection networks. +* Responsible for proper post processing of results and calculate of mAP metric which is used as +* universal metric for accuracy and participated in verification of accuracy drop +*/ +class SSDObjectDetectionCalibrator : public SSDObjectDetectionProcessor, public Int8Calibrator { +public: + SSDObjectDetectionCalibrator(int nPictures, const std::string &flags_m, const std::string &flags_d, + const std::string &flags_i, const std::string &subdir, int flags_b, + double threshold, + InferencePlugin plugin, CsvDumper &dumper, + const std::string &flags_a, const std::string &classes_list_file); + + shared_ptr<InferenceMetrics> Process()override; +}; diff --git a/inference-engine/samples/calibration_tool/data_stats.cpp b/inference-engine/samples/calibration_tool/data_stats.cpp new file mode 100644 index 000000000..313c3f00b --- /dev/null +++ b/inference-engine/samples/calibration_tool/data_stats.cpp @@ -0,0 +1,105 @@ +// Copyright (C) 2018 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +#include <stdlib.h> +#include <cfloat> +#include <cmath> +#include <stdint.h> +#include <iostream> +#include <limits> +#include <vector> +#include <algorithm> +#include <string> + +#include "data_stats.h" + + +TensorStatistic::TensorStatistic(float* data, size_t count, size_t nbuckets) { + _min = std::numeric_limits<float>::max(); + _max = std::numeric_limits<float>::min(); + for (size_t i = 0; i < count; i++) { + float val = static_cast<float>(data[i]); + if (_min > val) { + _min = val; + } + + if (_max < val) { + _max = val; + } + } + + if (_min == _max) { + return; + } +} + +float TensorStatistic::getMaxValue() const { + return _max; +} + + +float TensorStatistic::getMinValue() const { + return _min; +} + +std::vector<std::string> AggregatedDataStats::registeredLayers() { + std::vector<std::string> layers; + for (auto l : _data) { + layers.push_back(l.first); + } + return layers; +} + +void AggregatedDataStats::registerLayer(std::string layer) { + _data[layer]; +} + +void AggregatedDataStats::addTensorStatistics(const std::string& name, size_t channel, float* data, size_t count) { + auto&& byChannel = _data[name]; + byChannel[channel].push_back(TensorStatistic(data, count)); +} + +void AggregatedDataStats::addTensorStatistics(const std::string &name, size_t channel, uint8_t *data, size_t count) { + std::vector<float> intermediate; + for (size_t i = 0; i < count; i++) { + intermediate.push_back(data[i]); + } + addTensorStatistics(name, channel, intermediate.data(), count); +} + +size_t AggregatedDataStats::getNumberChannels(const std::string& name) const { + auto it = _data.find(name); + if (it != _data.end()) { + return it->second.size(); + } + return 0; +} + +void AggregatedDataStats::getDataMinMax(const std::string& name, size_t channel, float& min, float& max, float threshold) { + // take data by name + auto it = _data.find(name); + if (it != _data.end()) { + auto stats = it->second[channel]; + // having absolute min/max values, we can create new statistic + std::vector<float> maxValues; + std::vector<float> minValues; + for (size_t i = 0; i < stats.size(); i++) { + const TensorStatistic& tsS = stats[i]; + maxValues.push_back(tsS.getMaxValue()); + minValues.push_back(tsS.getMinValue()); + } + // define number of elements to throw out + size_t elementToTake = maxValues.size() * threshold / 100; + int elementsToThrow = maxValues.size() - elementToTake; + std::sort(maxValues.begin(), maxValues.end()); + std::sort(minValues.begin(), minValues.end()); + + min = minValues[elementsToThrow]; + max = maxValues[elementToTake - 1]; + } else { + min = max = 0.f; + } +} + diff --git a/inference-engine/samples/calibration_tool/data_stats.h b/inference-engine/samples/calibration_tool/data_stats.h new file mode 100644 index 000000000..c844b3db3 --- /dev/null +++ b/inference-engine/samples/calibration_tool/data_stats.h @@ -0,0 +1,32 @@ +// Copyright (C) 2018 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include <vector> +#include <map> +#include <string> + +struct TensorStatistic { + TensorStatistic(float* data, size_t count, size_t nbuckets = 1000); + float getMaxValue() const; + float getMinValue()const; +protected: + float _min; + float _max; +}; + +class AggregatedDataStats { +public: + void addTensorStatistics(const std::string& name, size_t channel, float* data, size_t count); + void addTensorStatistics(const std::string &name, size_t channel, uint8_t *data, size_t count); + void getDataMinMax(const std::string& name, size_t channel, float& min, float& max, float threshold); + size_t getNumberChannels(const std::string& name) const; + std::vector <std::string> registeredLayers(); + void registerLayer(std::string layer); +protected: + std::map<std::string, std::map<size_t, std::vector<TensorStatistic> > > _data; +}; + diff --git a/inference-engine/samples/calibration_tool/main.cpp b/inference-engine/samples/calibration_tool/main.cpp new file mode 100644 index 000000000..fd95a29ea --- /dev/null +++ b/inference-engine/samples/calibration_tool/main.cpp @@ -0,0 +1,521 @@ +// Copyright (C) 2018 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief The entry point for Inference Engine validation application + * @file validation_app/main.cpp + */ +#include <gflags/gflags.h> +#include <algorithm> +#include <functional> +#include <iostream> +#include <map> +#include <fstream> +#include <random> +#include <string> +#include <tuple> +#include <vector> +#include <limits> +#include <iomanip> +#include <memory> + +#include <ext_list.hpp> + +#include <samples/common.hpp> +#include <samples/slog.hpp> + +#include "user_exception.hpp" +#include "calibrator_processors.h" +#include "SSDObjectDetectionProcessor.hpp" +#include "YOLOObjectDetectionProcessor.hpp" +#include "network_serializer.h" +#include "ie_icnn_network_stats.hpp" +#include "details/caseless.hpp" + +using namespace std; +using namespace InferenceEngine; +using namespace InferenceEngine::details; + +using InferenceEngine::details::InferenceEngineException; + +#define DEFAULT_PATH_P "./lib" + +/// @brief Message for help argument +static const char help_message[] = "Print a help message"; +/// @brief Message for images argument +static const char image_message[] = "Required. Path to a directory with validation images. For Classification models, the directory must contain" + " folders named as labels with images inside or a .txt file with" + " a list of images. For Object Detection models, the dataset must be in" + " VOC format."; +/// @brief Message for plugin_path argument +static const char plugin_path_message[] = "Path to a plugin folder"; +/// @brief message for model argument +static const char model_message[] = "Required. Path to an .xml file with a trained model, including model name and " + "extension."; +/// @brief Message for plugin argument +static const char plugin_message[] = "Plugin name. For example, CPU. If this parameter is passed, " + "the sample looks for a specified plugin only."; +/// @brief Message for assigning cnn calculation to device +static const char target_device_message[] = "Target device to infer on: CPU (default), GPU, FPGA, or MYRIAD." + " The application looks for a suitable plugin for the specified device."; +/// @brief Message for label argument +static const char label_message[] = "Path to a file with labels for a model"; +/// @brief M`essage for batch argumenttype +static const char batch_message[] = "Batch size value. If not specified, the batch size value is taken from IR"; +/// @brief Message for dump argument +static const char dump_message[] = "Dump file names and inference results to a .csv file"; +/// @brief Message for network type +static const char type_message[] = "Type of an inferred network (\"C\" by default)"; +/// @brief Message for pp-type +static const char preprocessing_type[] = "Preprocessing type. Options: \"None\", \"Resize\", \"ResizeCrop\""; +/// @brief Message for pp-crop-size +static const char preprocessing_size[] = "Preprocessing size (used with ppType=\"ResizeCrop\")"; +static const char preprocessing_width[] = "Preprocessing width (overrides -ppSize, used with ppType=\"ResizeCrop\")"; +static const char preprocessing_height[] = "Preprocessing height (overrides -ppSize, used with ppType=\"ResizeCrop\")"; + +static const char obj_detection_annotations_message[] = "Required for Object Detection models. Path to a directory" + " containing an .xml file with annotations for images."; + +static const char obj_detection_classes_message[] = "Required for Object Detection models. Path to a file with" + " a list of classes"; + +static const char obj_detection_subdir_message[] = "Directory between the path to images (specified with -i) and image name (specified in the" + " .xml file). For VOC2007 dataset, use JPEGImages."; + +static const char obj_detection_kind_message[] = "Type of an Object Detection model. Options: SSD"; + +/// @brief Message for GPU custom kernels desc +static const char custom_cldnn_message[] = "Required for GPU custom kernels. " + "Absolute path to an .xml file with the kernel descriptions."; + +/// @brief Message for user library argument +static const char custom_cpu_library_message[] = "Required for CPU custom layers. " + "Absolute path to a shared library with the kernel implementations."; + +static const char zero_background_message[] = "\"Zero is a background\" flag. Some networks are trained with a modified" + " dataset where the class IDs " + " are enumerated from 1, but 0 is an undefined \"background\" class" + " (which is never detected)"; + +/// @brief Network type options and their descriptions +static const char* types_descriptions[][2] = { + { "C", "calibrate Classification network and write the calibrated network to IR" }, +// { "SS", "semantic segmentation" }, // Not supported yet + { "OD", "calibrate Object Detection network and write the calibrated network to IR" }, + { "RawC", "collect only statistics for Classification network and write statistics to IR. With this option, a model is not calibrated. For calibration " + "and statisctics collection, use \"-t C\" instead." }, + { "RawOD", "collect only statistics for Object Detection network and write statistics to IR. With this option, a model is not calibrated. For calibration " + "and statisctics collection, use \"-t OD\" instead" }, + { nullptr, nullptr } +}; + +static const char accuracy_threshold_message[] = "Threshold for a maximum accuracy drop of quantized model." + " Must be an integer number (percents)" + " without a percent sign. Default value is 1, which stands for accepted" + " accuracy drop in 1%"; +static const char number_of_pictures_message[] = "Number of pictures from the whole validation set to" + "create the calibration dataset. Default value is 0, which stands for" + "the whole provided dataset"; +static const char output_model_name[] = "Output name for calibrated model. Default is <original_model_name>_i8.xml|bin"; + +/// @brief Define flag for showing help message <br> +DEFINE_bool(h, false, help_message); +/// @brief Define parameter for a path to images <br> +/// It is a required parameter +DEFINE_string(i, "", image_message); +/// @brief Define parameter for a path to model file <br> +/// It is a required parameter +DEFINE_string(m, "", model_message); +/// @brief Define parameter for a plugin name <br> +/// It is a required parameter +DEFINE_string(p, "", plugin_message); +/// @brief Define parameter for a path to a file with labels <br> +/// Default is empty +DEFINE_string(OCl, "", label_message); +/// @brief Define parameter for a path to plugins <br> +/// Default is ./lib +DEFINE_string(pp, DEFAULT_PATH_P, plugin_path_message); +/// @brief Define paraneter for a target device to infer on <br> +DEFINE_string(d, "CPU", target_device_message); +/// @brief Define parameter for batch size <br> +/// Default is 0 (which means that batch size is not specified) +DEFINE_int32(b, 0, batch_message); +/// @brief Define flag to dump results to a file <br> +DEFINE_bool(dump, false, dump_message); +/// @brief Define parameter for a network type +DEFINE_string(t, "C", type_message); + +/// @brief Define parameter for preprocessing type +DEFINE_string(ppType, "", preprocessing_type); + +/// @brief Define parameter for preprocessing size +DEFINE_int32(ppSize, 0, preprocessing_size); +DEFINE_int32(ppWidth, 0, preprocessing_width); +DEFINE_int32(ppHeight, 0, preprocessing_height); + +DEFINE_bool(Czb, false, zero_background_message); + +DEFINE_string(ODa, "", obj_detection_annotations_message); + +DEFINE_string(ODc, "", obj_detection_classes_message); + +DEFINE_string(ODsubdir, "", obj_detection_subdir_message); + +/// @brief Define parameter for a type of Object Detection network +DEFINE_string(ODkind, "SSD", obj_detection_kind_message); + +/// @brief Define parameter for GPU kernels path <br> +/// Default is ./lib +DEFINE_string(c, "", custom_cldnn_message); + +/// @brief Define parameter for a path to CPU library with user layers <br> +/// It is an optional parameter +DEFINE_string(l, "", custom_cpu_library_message); + +/// @brief Define parameter for accuracy drop threshold +DEFINE_double(threshold, 1.0f, accuracy_threshold_message); + +DEFINE_int32(subset, 0, number_of_pictures_message); + +DEFINE_string(output, "", output_model_name); + +/** + * @brief This function shows a help message + */ +static void showUsage() { + std::cout << std::endl; + std::cout << "Usage: calibration_tool [OPTION]" << std::endl << std::endl; + std::cout << "Available options:" << std::endl; + std::cout << std::endl; + std::cout << " -h " << help_message << std::endl; + std::cout << " -t <type> " << type_message << std::endl; + for (int i = 0; types_descriptions[i][0] != nullptr; i++) { + std::cout << " -t \"" << types_descriptions[i][0] << "\" to " << types_descriptions[i][1] << std::endl; + } + std::cout << " -i <path> " << image_message << std::endl; + std::cout << " -m <path> " << model_message << std::endl; + std::cout << " -l <absolute_path> " << custom_cpu_library_message << std::endl; + std::cout << " -c <absolute_path> " << custom_cldnn_message << std::endl; + std::cout << " -d <device> " << target_device_message << std::endl; + std::cout << " -b N " << batch_message << std::endl; + std::cout << " -ppType <type> " << preprocessing_type << std::endl; + std::cout << " -ppSize N " << preprocessing_size << std::endl; + std::cout << " -ppWidth W " << preprocessing_width << std::endl; + std::cout << " -ppHeight H " << preprocessing_height << std::endl; + std::cout << " --dump " << dump_message << std::endl; + std::cout << " -subset " << number_of_pictures_message << std::endl; + std::cout << " -output <output_IR> " << output_model_name << std::endl; + std::cout << " -threshold " << accuracy_threshold_message << std::endl; + + std::cout << std::endl; + std::cout << " Classification-specific options:" << std::endl; + std::cout << " -Czb true " << zero_background_message << std::endl; + + std::cout << std::endl; + std::cout << " Object detection-specific options:" << std::endl; + std::cout << " -ODkind <kind> " << obj_detection_kind_message << std::endl; + std::cout << " -ODa <path> " << obj_detection_annotations_message << std::endl; + std::cout << " -ODc <file> " << obj_detection_classes_message << std::endl; + std::cout << " -ODsubdir <name> " << obj_detection_subdir_message << std::endl << std::endl; +} + +enum NetworkType { + Undefined = -1, + Classification, + ObjDetection, + RawC, + RawOD +}; + +std::string strtolower(const std::string& s) { + std::string res = s; + std::transform(res.begin(), res.end(), res.begin(), ::tolower); + return res; +} + +void SaveCalibratedIR(const std::string &originalName, + const std::string &outModelName, + const std::map<std::string, bool>& layersToInt8, + const InferenceEngine::NetworkStatsMap& statMap) { + slog::info << "Layers profile for Int8 quantization\n"; + CNNNetReader networkReader; + networkReader.ReadNetwork(originalName); + if (!networkReader.isParseSuccess())THROW_IE_EXCEPTION << "cannot load a failed Model"; + + /** Extract model name and load weights **/ + std::string binFileName = fileNameNoExt(originalName)+ ".bin"; + networkReader.ReadWeights(binFileName.c_str()); + + auto network = networkReader.getNetwork(); + for (auto &&layer : network) { + if (CaselessEq<std::string>()(layer->type, "convolution")) { + auto it = layersToInt8.find(layer->name); + if (it != layersToInt8.end() && it->second == false) { + layer->params["quantization_level"] = "FP32"; + std::cout << layer->name << ": " << "FP32" << std::endl; + } else { + layer->params["quantization_level"] = "I8"; + std::cout << layer->name << ": " << "I8" << std::endl; + } + } + } + + + ICNNNetworkStats* pstats = nullptr; + StatusCode s = ((ICNNNetwork&)networkReader.getNetwork()).getStats(&pstats, nullptr); + if (s == StatusCode::OK && pstats) { + pstats->setNodesStats(statMap); + } + + slog::info << "Write calibrated network to " << outModelName << ".(xml|bin) IR file\n"; + CNNNetworkSerializer serializer; + serializer.Serialize(outModelName + ".xml", outModelName + ".bin", networkReader.getNetwork()); +} + +/** + * @brief The main function of inference engine sample application + * @param argc - The number of arguments + * @param argv - Arguments + * @return 0 if all good + */ +int main(int argc, char *argv[]) { + try { + slog::info << "InferenceEngine: " << GetInferenceEngineVersion() << slog::endl; + + // ---------------------------Parsing and validating input arguments-------------------------------------- + slog::info << "Parsing input parameters" << slog::endl; + + bool noOptions = argc == 1; + + gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); + if (FLAGS_h || noOptions) { + showUsage(); + return 1; + } + + UserExceptions ee; + + NetworkType netType = Undefined; + // Checking the network type + if (std::string(FLAGS_t) == "C") { + netType = Classification; + } else if (std::string(FLAGS_t) == "OD") { + netType = ObjDetection; + } else if (std::string(FLAGS_t) == "RawC") { + netType = RawC; + } else if (std::string(FLAGS_t) == "RawOD") { + netType = RawOD; + } else { + ee << UserException(5, "Unknown network type specified (invalid -t option)"); + } + + // Checking required options + if (FLAGS_m.empty()) ee << UserException(3, "Model file is not specified (missing -m option)"); + if (FLAGS_i.empty()) ee << UserException(4, "Images list is not specified (missing -i option)"); + if (FLAGS_d.empty()) ee << UserException(5, "Target device is not specified (missing -d option)"); + if (FLAGS_b < 0) ee << UserException(6, "Batch must be positive (invalid -b option value)"); + + if (netType == ObjDetection) { + // Checking required OD-specific options + if (FLAGS_ODa.empty()) ee << UserException(11, "Annotations folder is not specified for object detection (missing -a option)"); + if (FLAGS_ODc.empty()) ee << UserException(12, "Classes file is not specified (missing -c option)"); + if (FLAGS_b > 0) ee << UserException(13, "Batch option other than 0 is not supported for Object Detection networks"); + } + + if (!ee.empty()) throw ee; + // ----------------------------------------------------------------------------------------------------- + + // ---------------------Loading plugin for Inference Engine------------------------------------------------ + slog::info << "Loading plugin" << slog::endl; + /** Loading the library with extensions if provided**/ + InferencePlugin plugin = PluginDispatcher({ FLAGS_pp, "../../../lib/intel64", "" }).getPluginByDevice(FLAGS_d); + + /** Loading default extensions **/ + if (FLAGS_d.find("CPU") != std::string::npos) { + /** + * cpu_extensions library is compiled from "extension" folder containing + * custom CPU plugin layer implementations. These layers are not supported + * by CPU, but they can be useful for inferring custom topologies. + **/ + plugin.AddExtension(std::make_shared<Extensions::Cpu::CpuExtensions>()); + } + + if (!FLAGS_l.empty()) { + // CPU extensions are loaded as a shared library and passed as a pointer to base extension + IExtensionPtr extension_ptr = make_so_pointer<IExtension>(FLAGS_l); + plugin.AddExtension(extension_ptr); + slog::info << "CPU Extension loaded: " << FLAGS_l << slog::endl; + } + if (!FLAGS_c.empty()) { + // GPU extensions are loaded from an .xml description and OpenCL kernel files + plugin.SetConfig({{PluginConfigParams::KEY_CONFIG_FILE, FLAGS_c}}); + slog::info << "GPU Extension loaded: " << FLAGS_c << slog::endl; + } + + printPluginVersion(plugin, std::cout); + + CsvDumper dumper(FLAGS_dump); + + std::shared_ptr<Processor> processor; + + PreprocessingOptions preprocessingOptions; + if (strtolower(FLAGS_ppType.c_str()) == "none") { + preprocessingOptions = PreprocessingOptions(false, ResizeCropPolicy::DoNothing); + } else if (strtolower(FLAGS_ppType) == "resizecrop") { + size_t ppWidth = FLAGS_ppSize; + size_t ppHeight = FLAGS_ppSize; + + if (FLAGS_ppWidth > 0) ppWidth = FLAGS_ppSize; + if (FLAGS_ppHeight > 0) ppHeight = FLAGS_ppSize; + + if (FLAGS_ppSize > 0 || (FLAGS_ppWidth > 0 && FLAGS_ppHeight > 0)) { + preprocessingOptions = PreprocessingOptions(false, ResizeCropPolicy::ResizeThenCrop, ppWidth, ppHeight); + } else { + THROW_USER_EXCEPTION(2) << "Size must be specified for preprocessing type " << FLAGS_ppType; + } + } else if (strtolower(FLAGS_ppType) == "resize" || FLAGS_ppType.empty()) { + preprocessingOptions = PreprocessingOptions(false, ResizeCropPolicy::Resize); + } else { + THROW_USER_EXCEPTION(2) << "Unknown preprocessing type: " << FLAGS_ppType; + } + + if (netType == Classification || netType == RawC) { + processor = std::shared_ptr<Processor>( + new ClassificationCalibrator(FLAGS_subset, FLAGS_m, FLAGS_d, FLAGS_i, FLAGS_b, + plugin, dumper, FLAGS_l, preprocessingOptions, FLAGS_Czb)); + } else if (netType == ObjDetection || netType == RawOD) { + if (FLAGS_ODkind == "SSD") { + processor = std::shared_ptr<Processor>( + new SSDObjectDetectionCalibrator(FLAGS_subset, FLAGS_m, FLAGS_d, FLAGS_i, FLAGS_ODsubdir, FLAGS_b, + 0.5, plugin, dumper, FLAGS_ODa, FLAGS_ODc)); +/* } else if (FLAGS_ODkind == "YOLO") { + processor = std::shared_ptr<Processor>( + new YOLOObjectDetectionProcessor(FLAGS_m, FLAGS_d, FLAGS_i, FLAGS_ODsubdir, FLAGS_b, + 0.5, plugin, dumper, FLAGS_ODa, FLAGS_ODc)); +*/ + } + } else { + THROW_USER_EXCEPTION(2) << "Unknown network type specified" << FLAGS_ppType; + } + if (!processor.get()) { + THROW_USER_EXCEPTION(2) << "Processor pointer is invalid" << FLAGS_ppType; + } + + Int8Calibrator* calibrator = dynamic_cast<Int8Calibrator*>(processor.get()); + + if (netType != RawC && netType != RawOD) { + slog::info << "Collecting accuracy metric in FP32 mode to get a baseline, collecting activation statistics" << slog::endl; + } else { + slog::info << "Collecting activation statistics" << slog::endl; + } + calibrator->collectFP32Statistic(); + shared_ptr<Processor::InferenceMetrics> pIMFP32 = processor->Process(); + const CalibrationMetrics* mFP32 = dynamic_cast<const CalibrationMetrics*>(pIMFP32.get()); + std:: cout << " FP32 Accuracy: " << OUTPUT_FLOATING(100.0 * mFP32->AccuracyResult) << "% " << std::endl; + + InferenceEngine::NetworkStatsMap statMap; + std::map<std::string, bool> layersToInt8; + bool bAccuracy = false; + + if (netType != RawC && netType != RawOD) { + slog::info << "Verification of network accuracy if all possible layers converted to INT8" << slog::endl; + float bestThreshold = 100.f; + float maximalAccuracy = 0.f; + for (float threshold = 100.0f; threshold > 95.0f; threshold -= 0.5) { + std::cout << "Validate int8 accuracy, threshold for activation statistics = " << threshold << std::endl; + InferenceEngine::NetworkStatsMap tmpStatMap = calibrator->getStatistic(threshold); + calibrator->validateInt8Config(tmpStatMap, {}); + shared_ptr<Processor::InferenceMetrics> pIM_I8 = processor->Process(); + const CalibrationMetrics *mI8 = dynamic_cast<const CalibrationMetrics *>(pIM_I8.get()); + if (maximalAccuracy < mI8->AccuracyResult) { + maximalAccuracy = mI8->AccuracyResult; + bestThreshold = threshold; + } + std::cout << " Accuracy is " << OUTPUT_FLOATING(100.0 * mI8->AccuracyResult) << "%" << std::endl; + } + + statMap = calibrator->getStatistic(bestThreshold); + + if ((mFP32->AccuracyResult - maximalAccuracy) > (FLAGS_threshold / 100)) { + slog::info << "Accuracy of all layers conversion does not correspond to the required threshold\n"; + cout << "FP32 Accuracy: " << OUTPUT_FLOATING(100.0 * mFP32->AccuracyResult) << "% vs " << + "all Int8 layers Accuracy: " << OUTPUT_FLOATING(100.0 * maximalAccuracy) << "%, " << + "threshold for activation statistics: " << bestThreshold << "%" << std::endl; + slog::info << "Collecting intermediate per-layer accuracy drop" << slog::endl; + // getting statistic on accuracy drop by layers + calibrator->collectByLayerStatistic(statMap); + processor->Process(); + // starting to reduce number of layers being converted to Int8 + std::map<std::string, float> layersAccuracyDrop = calibrator->layersAccuracyDrop(); + + std::map<float, std::string> orderedLayersAccuracyDrop; + for (auto d : layersAccuracyDrop) { + orderedLayersAccuracyDrop[d.second] = d.first; + layersToInt8[d.first] = true; + } + std::map<float, std::string>::const_reverse_iterator it = orderedLayersAccuracyDrop.crbegin(); + + shared_ptr<Processor::InferenceMetrics> pIM_I8; + const CalibrationMetrics *mI8; + while (it != orderedLayersAccuracyDrop.crend() && bAccuracy == false) { + slog::info << "Returning of '" << it->second << "' to FP32 precision, start validation\n"; + layersToInt8[it->second] = false; + calibrator->validateInt8Config(statMap, layersToInt8); + pIM_I8 = processor->Process(); + mI8 = dynamic_cast<const CalibrationMetrics *>(pIM_I8.get()); + maximalAccuracy = mI8->AccuracyResult; + if ((mFP32->AccuracyResult - maximalAccuracy) > (FLAGS_threshold / 100)) { + cout << "FP32 Accuracy: " << OUTPUT_FLOATING(100.0 * mFP32->AccuracyResult) << "% vs " << + "current Int8 configuration Accuracy: " << OUTPUT_FLOATING(100.0 * maximalAccuracy) << "%" << std::endl; + } else { + bAccuracy = true; + } + it++; + } + } else { + bAccuracy = true; + } + + if (bAccuracy) { + slog::info << "Achieved required accuracy drop satisfying threshold\n"; + cout << "FP32 accuracy: " << OUTPUT_FLOATING(100.0 * mFP32->AccuracyResult) << "% vs " << + "current Int8 configuration accuracy: " << OUTPUT_FLOATING(100.0 * maximalAccuracy) << "% " << + "with threshold for activation statistic: " << bestThreshold << "%" << std::endl; + std::string outModelName = FLAGS_output.empty() ? fileNameNoExt(FLAGS_m) + "_i8" : fileNameNoExt(FLAGS_output); + SaveCalibratedIR(FLAGS_m, outModelName, layersToInt8, statMap); + } else { + slog::info << "Required threshold of accuracy drop cannot be achieved with any int8 quantization\n"; + } + } else { + std::cout << "Collected activation statistics, writing maximum values to IR" << std::endl; + statMap = calibrator->getStatistic(100.0f); + std::string outModelName = FLAGS_output.empty() ? fileNameNoExt(FLAGS_m) + "_i8" : fileNameNoExt(FLAGS_output); + SaveCalibratedIR(FLAGS_m, outModelName, layersToInt8, statMap); + } + + if (dumper.dumpEnabled()) { + slog::info << "Dump file generated: " << dumper.getFilename() << slog::endl; + } + } catch (const InferenceEngineException& ex) { + slog::err << "Inference problem: \n" << ex.what() << slog::endl; + return 1; + } catch (const UserException& ex) { + slog::err << "Input problem: \n" << ex.what() << slog::endl; + showUsage(); + return ex.exitCode(); + } catch (const UserExceptions& ex) { + if (ex.list().size() == 1) { + slog::err << "Input problem: " << ex.what() << slog::endl; + showUsage(); + return ex.list().begin()->exitCode(); + } else { + const char* s = ex.what(); + slog::err << "Input problems: \n" << ex.what() << slog::endl; + showUsage(); + return ex.list().begin()->exitCode(); + } + } + return 0; +} diff --git a/inference-engine/samples/calibration_tool/network_serializer.cpp b/inference-engine/samples/calibration_tool/network_serializer.cpp new file mode 100644 index 000000000..dd245d3ce --- /dev/null +++ b/inference-engine/samples/calibration_tool/network_serializer.cpp @@ -0,0 +1,381 @@ +// Copyright (C) 2018 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +#include <fstream> +#include <map> +#include <vector> +#include <string> +#include <ie_precision.hpp> +#include "details/ie_cnn_network_tools.h" +#include "details/caseless.hpp" +#include "ie_layers_property.hpp" +#include "network_serializer.h" +#include "../common/samples/common.hpp" + +using namespace InferenceEngine; +using namespace details; + +template<typename T> +std::string arrayToIRProperty(const T& property) { + std::string sProperty; + for (size_t i = 0; i < property.size(); i++) { + sProperty = sProperty + std::to_string(property[i]) + + std::string((i != property.size() - 1) ? "," : ""); + } + return sProperty; +} + +template<typename T> +std::string arrayRevertToIRProperty(const T& property) { + std::string sProperty; + for (size_t i = 0; i < property.size(); i++) { + sProperty = sProperty + std::to_string(property[property.size() - i - 1]) + + std::string((i != property.size() - 1) ? "," : ""); + } + return sProperty; +} + + +void CNNNetworkSerializer::Serialize(const std::string &xmlPath, const std::string &binPath, + ICNNNetwork &network) { + std::ofstream ofsBin(binPath, std::ofstream::out | std::ofstream::binary); + + pugi::xml_document doc; + + pugi::xml_node net = doc.append_child("net"); + + char name[1024]; + network.getName(name, 1024); + + net.append_attribute("name").set_value(name); + net.append_attribute("version").set_value("3"); + net.append_attribute("batch").set_value("1"); + + pugi::xml_node layers = net.append_child("layers"); + + size_t dataOffset = 0; + + std::string dataName = "data"; + + std::vector<CNNLayerPtr> ordered; + + ordered = CNNNetSortTopologically(network); + + std::map<CNNLayer::Ptr, int> matching; + for (size_t i = 0; i < ordered.size(); i++) { + matching[ordered[i]] = i; + } + + for (size_t i = 0; i < ordered.size(); i++) { + CNNLayerPtr node = ordered[i]; + + pugi::xml_node layer = layers.append_child("layer"); + Precision precision = node->precision; + layer.append_attribute("name").set_value(node->name.c_str()); + layer.append_attribute("type").set_value(node->type.c_str()); + layer.append_attribute("precision").set_value(precision.name()); + layer.append_attribute("id").set_value(i); + + updateStdLayerParams(node); + + auto ¶ms = node->params; + + if (params.size()) { + pugi::xml_node data = layer.append_child(dataName.c_str()); + + for (auto it : params) { + data.append_attribute(it.first.c_str()).set_value(it.second.c_str()); + } + } + + if (node->insData.size()) { + pugi::xml_node input = layer.append_child("input"); + + for (size_t iport = 0; iport < node->insData.size(); iport++) { + DataPtr d = node->insData[iport].lock(); + pugi::xml_node port = input.append_child("port"); + + port.append_attribute("id").set_value(iport); + + for (auto dim : d->getDims()) { + port.append_child("dim").text().set(dim); + } + } + } + if (node->outData.size()) { + pugi::xml_node input = layer.append_child("output"); + for (size_t oport = 0; oport < node->outData.size(); oport++) { + pugi::xml_node port = input.append_child("port"); + + port.append_attribute("id").set_value(node->insData.size() + oport); + + for (auto dim : node->outData[oport]->getDims()) { + port.append_child("dim").text().set(dim); + } + } + } + if (node->blobs.size()) { + auto blobsNode = layer.append_child("blobs"); + for (auto dataIt : node->blobs) { + const char *dataPtr = dataIt.second->buffer().as<char*>(); + + size_t dataSize = dataIt.second->byteSize(); + pugi::xml_node data = blobsNode.append_child(dataIt.first.c_str()); + data.append_attribute("offset").set_value(dataOffset); + data.append_attribute("size").set_value(dataSize); + + dataOffset += dataSize; + ofsBin.write(dataPtr, dataSize); + } + } + } + + pugi::xml_node edges = net.append_child("edges"); + + for (size_t i = 0; i < ordered.size(); i++) { + CNNLayer::Ptr node = ordered[i]; + + if (node->outData.size()) { + auto itFrom = matching.find(node); + if (itFrom == matching.end()) { + THROW_IE_EXCEPTION << "Internal error, cannot find " << node->name << " in matching container during serialization of IR"; + } + for (size_t oport = 0; oport < node->outData.size(); oport++) { + DataPtr outData = node->outData[oport]; + for (auto inputTo : outData->inputTo) { + auto itTo = matching.find(inputTo.second); + if (itTo == matching.end()) { + THROW_IE_EXCEPTION << "Broken edge form layer " << node->name << " to layer " << inputTo.first<< "during serialization of IR"; + } + + size_t foundPort = -1; + for (size_t iport = 0; iport < inputTo.second->insData.size(); iport++) { + if (inputTo.second->insData[iport].lock() == outData) { + foundPort = iport; + } + } + if (foundPort == -1) { + THROW_IE_EXCEPTION << "Broken edge from layer to parent, cannot find parent " << outData->name << " for layer " << inputTo.second->name + << "\ninitial layer for edge output " << node->name; + } + pugi::xml_node edge = edges.append_child("edge"); + + edge.append_attribute("from-layer").set_value(itFrom->second); + edge.append_attribute("from-port").set_value(oport + node->insData.size()); + + edge.append_attribute("to-layer").set_value(itTo->second); + edge.append_attribute("to-port").set_value(foundPort); + } + } + } + } + + + InputsDataMap inputInfo; + network.getInputsInfo(inputInfo); + + // assuming that we have preprocess only for one input + for (auto ii : inputInfo) { + auto pp = ii.second->getPreProcess(); + size_t nInChannels = pp.getNumberOfChannels(); + if (nInChannels) { + pugi::xml_node preproc = net.append_child("pre-process"); + + preproc.append_attribute("reference-layer-name").set_value(ii.first.c_str()); + preproc.append_attribute("mean-precision").set_value(Precision(Precision::FP32).name()); + + for (size_t ch = 0; ch < nInChannels; ch++) { + PreProcessChannel::Ptr &preProcessChannel = pp[ch]; + auto channel = preproc.append_child("channel"); + channel.append_attribute("id").set_value(ch); + + auto mean = channel.append_child("mean"); + + if (!preProcessChannel->meanData) { + mean.append_attribute("value").set_value(preProcessChannel->meanValue); + } else { + THROW_IE_EXCEPTION << "Mean data is not supported yet for serialization of the model"; + } + } + } + } + + + // adding statistic to the file if statistic exists + ICNNNetworkStats* netNodesStats = nullptr; + auto stats = net.append_child("statistics"); + network.getStats(&netNodesStats, nullptr); + NetworkStatsMap statsmap = netNodesStats->getNodesStats(); + + auto joinCommas = [&](std::vector<float>& v) -> std::string { + std::string res; + + for (size_t i = 0; i < v.size(); ++i) { + res += std::to_string(v[i]); + if (i < v.size() - 1) { + res += ", "; + } + } + + return res; + }; + + for (auto itStats : statsmap) { + auto layer = stats.append_child("layer"); + + layer.append_child("name").text().set(itStats.first.c_str()); + + layer.append_child("min").text().set(joinCommas(itStats.second->_minOutputs).c_str()); + layer.append_child("max").text().set(joinCommas(itStats.second->_maxOutputs).c_str()); + } + + doc.save_file(xmlPath.c_str()); +} + + +void CNNNetworkSerializer::updateStdLayerParams(CNNLayer::Ptr layer) { + auto layerPtr = layer.get(); + auto type = layer->type; + auto ¶ms = layer->params; + + if (CaselessEq<std::string>()(layer->type, "power")) { + PowerLayer *lr = dynamic_cast<PowerLayer *>(layerPtr); + + params["scale"] = std::to_string(lr->scale); + params["shift"] = std::to_string(lr->offset); + params["power"] = std::to_string(lr->power); + } else if (CaselessEq<std::string>()(layer->type, "convolution") || + CaselessEq<std::string>()(layer->type, "deconvolution")) { + ConvolutionLayer *lr = dynamic_cast<ConvolutionLayer *>(layerPtr); + + params["kernel"] = arrayRevertToIRProperty(lr->_kernel); + params["pads_begin"] = arrayRevertToIRProperty(lr->_padding); + params["pads_end"] = arrayRevertToIRProperty(lr->_pads_end); + params["strides"] = arrayRevertToIRProperty(lr->_stride); + params["dilations"] = arrayRevertToIRProperty(lr->_dilation); + params["output"] = std::to_string(lr->_out_depth); + params["group"] = std::to_string(lr->_group); + } else if (CaselessEq<std::string>()(layer->type, "relu")) { + ReLULayer *lr = dynamic_cast<ReLULayer *>(layerPtr); + if (lr->negative_slope != 0.0f) { + params["negative_slope"] = std::to_string(lr->negative_slope); + } + } else if (CaselessEq<std::string>()(layer->type, "norm") || + CaselessEq<std::string>()(layer->type, "lrn")) { + NormLayer *lr = dynamic_cast<NormLayer *>(layerPtr); + + params["alpha"] = std::to_string(lr->_alpha); + params["beta"] = std::to_string(lr->_beta); + params["local-size"] = std::to_string(lr->_size); + params["region"] = lr->_isAcrossMaps ? "across" : "same"; + } else if (CaselessEq<std::string>()(layer->type, "pooling")) { + PoolingLayer *lr = dynamic_cast<PoolingLayer *>(layerPtr); + + params["kernel"] = arrayRevertToIRProperty(lr->_kernel); + params["pads_begin"] = arrayRevertToIRProperty(lr->_padding); + params["pads_end"] = arrayRevertToIRProperty(lr->_pads_end); + params["strides"] = arrayRevertToIRProperty(lr->_stride); + + switch (lr->_type) { + case PoolingLayer::MAX: + params["pool-method"] = "max"; + break; + case PoolingLayer::AVG: + params["pool-method"] = "avg"; + break; + + default: + THROW_IE_EXCEPTION << "Found unsupported pooling method: " << lr->_type; + } + } else if (CaselessEq<std::string>()(layer->type, "split")) { + SplitLayer *lr = dynamic_cast<SplitLayer *>(layerPtr); + params["axis"] = std::to_string(lr->_axis); + } else if (CaselessEq<std::string>()(layer->type, "concat")) { + ConcatLayer *lr = dynamic_cast<ConcatLayer *>(layerPtr); + params["axis"] = std::to_string(lr->_axis); + } else if (CaselessEq<std::string>()(layer->type, "FullyConnected") || + CaselessEq<std::string>()(layer->type, "InnerProduct")) { + FullyConnectedLayer *lr = dynamic_cast<FullyConnectedLayer *>(layerPtr); + params["out-size"] = std::to_string(lr->_out_num); + } else if (CaselessEq<std::string>()(layer->type, "softmax")) { + SoftMaxLayer *lr = dynamic_cast<SoftMaxLayer *>(layerPtr); + params["axis"] = std::to_string(lr->axis); + } else if (CaselessEq<std::string>()(layer->type, "reshape")) { + // need to add here support of flatten layer if it is created from API + ReshapeLayer *lr = dynamic_cast<ReshapeLayer *>(layerPtr); + params["axis"] = std::to_string(lr->axis); + params["num_axes"] = std::to_string(lr->num_axes); + params["dim"] = arrayToIRProperty(lr->shape); + } else if (CaselessEq<std::string>()(layer->type, "Eltwise")) { + EltwiseLayer *lr = dynamic_cast<EltwiseLayer *>(layerPtr); + + std::string op; + + switch (lr->_operation) { + case EltwiseLayer::Sum: + op = "sum"; + break; + case EltwiseLayer::Prod: + op = "prod"; + break; + case EltwiseLayer::Max: + op = "max"; + break; + default: + break; + } + + params["operation"] = op; + } else if (CaselessEq<std::string>()(layer->type, "scaleshift")) { + ScaleShiftLayer *lr = dynamic_cast<ScaleShiftLayer *>(layerPtr); + params["broadcast"] = std::to_string(lr->_broadcast); + } else if (CaselessEq<std::string>()(layer->type, "crop")) { + CropLayer *lr = dynamic_cast<CropLayer *>(layerPtr); + params["axis"] = arrayToIRProperty(lr->axis); + params["offset"] = arrayToIRProperty(lr->offset); + params["dim"] = arrayToIRProperty(lr->dim); + } else if (CaselessEq<std::string>()(layer->type, "tile")) { + TileLayer *lr = dynamic_cast<TileLayer *>(layerPtr); + params["axis"] = std::to_string(lr->axis); + params["tiles"] = std::to_string(lr->tiles); + } else if (CaselessEq<std::string>()(layer->type, "prelu")) { + PReLULayer *lr = dynamic_cast<PReLULayer *>(layerPtr); + params["channel_shared"] = std::to_string(lr->_channel_shared); + } else if (CaselessEq<std::string>()(layer->type, "clamp")) { + ClampLayer *lr = dynamic_cast<ClampLayer *>(layerPtr); + params["min"] = std::to_string(lr->min_value); + params["max"] = std::to_string(lr->max_value); + } else if (CaselessEq<std::string>()(layer->type, "BatchNormalization")) { + BatchNormalizationLayer *lr = dynamic_cast<BatchNormalizationLayer *>(layerPtr); + params["epsilon"] = std::to_string(lr->epsilon); + } else if (CaselessEq<std::string>()(layer->type, "grn")) { + GRNLayer *lr = dynamic_cast<GRNLayer *>(layerPtr); + params["bias"] = std::to_string(lr->bias); + } else if (CaselessEq<std::string>()(layer->type, "mvn")) { + MVNLayer *lr = dynamic_cast<MVNLayer *>(layerPtr); + params["across_channels"] = std::to_string(lr->across_channels); + params["normalize_variance"] = std::to_string(lr->normalize); + } else if (CaselessEq<std::string>()(layer->type, "rnn") || + CaselessEq<std::string>()(layer->type, "TensorIterator") || + CaselessEq<std::string>()(layer->type, "LSTMCell")) { + THROW_IE_EXCEPTION << "Not covered layers for writing to IR"; + } + + if (layer->params.find("quantization_level") != layer->params.end()) { + params["quantization_level"] = layer->params["quantization_level"]; + } + + + // update of weightable layers + WeightableLayer *pwlayer = dynamic_cast<WeightableLayer *>(layerPtr); + if (pwlayer) { + if (pwlayer->_weights) { + pwlayer->blobs["weights"] = pwlayer->_weights; + } + if (pwlayer->_biases) { + pwlayer->blobs["biases"] = pwlayer->_biases; + } + } +}
\ No newline at end of file diff --git a/inference-engine/samples/calibration_tool/network_serializer.h b/inference-engine/samples/calibration_tool/network_serializer.h new file mode 100644 index 000000000..d0b91ae58 --- /dev/null +++ b/inference-engine/samples/calibration_tool/network_serializer.h @@ -0,0 +1,21 @@ +// Copyright (C) 2018 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "inference_engine.hpp" +#include <pugixml/pugixml.hpp> +#include <string> + +/** Class for serialization of model been presented as ICNNNetwork to the disk + */ +class CNNNetworkSerializer { +public: + void Serialize(const std::string &xmlPath, const std::string &binPath, + InferenceEngine::ICNNNetwork& network); + +protected: + void updateStdLayerParams(InferenceEngine::CNNLayer::Ptr layer); +}; |